| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8920877793436682, |
| "eval_steps": 100, |
| "global_step": 4431, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0001999999444378697, |
| "loss": 0.7972, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0001999997777515405, |
| "loss": 0.221, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999994999411976, |
| "loss": 0.1313, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999911100714978, |
| "loss": 0.1169, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999861094982923, |
| "loss": 0.1353, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999799976979157, |
| "loss": 0.1355, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999727746771603, |
| "loss": 0.0991, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999644404440528, |
| "loss": 0.0995, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999549950078536, |
| "loss": 0.1175, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.000199994443837906, |
| "loss": 0.0941, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 0.09952569007873535, |
| "eval_runtime": 659.9109, |
| "eval_samples_per_second": 11.15, |
| "eval_steps_per_second": 2.788, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999327705694028, |
| "loss": 0.09, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999199915918467, |
| "loss": 0.1044, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019999061014605934, |
| "loss": 0.0978, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019998911001910778, |
| "loss": 0.0963, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.000199987498779997, |
| "loss": 0.0955, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019998577643051744, |
| "loss": 0.0994, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019998394297258311, |
| "loss": 0.0921, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999819984082314, |
| "loss": 0.087, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019997994273962316, |
| "loss": 0.0826, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019997777596904282, |
| "loss": 0.0903, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 0.090216726064682, |
| "eval_runtime": 649.064, |
| "eval_samples_per_second": 11.336, |
| "eval_steps_per_second": 2.835, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019997549809889811, |
| "loss": 0.0949, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019997310913172028, |
| "loss": 0.0902, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019997060907016415, |
| "loss": 0.0792, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0001999679979170078, |
| "loss": 0.0901, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0001999652756751529, |
| "loss": 0.1035, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019996244234762452, |
| "loss": 0.0875, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019995949793757118, |
| "loss": 0.0718, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019995644244826481, |
| "loss": 0.0799, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0001999532758831008, |
| "loss": 0.0767, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.000199949998245598, |
| "loss": 0.089, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 0.08015166223049164, |
| "eval_runtime": 646.37, |
| "eval_samples_per_second": 11.384, |
| "eval_steps_per_second": 2.847, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019994660953939864, |
| "loss": 0.0833, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019994310976826837, |
| "loss": 0.0799, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019993949893609635, |
| "loss": 0.0747, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019993577704689505, |
| "loss": 0.0888, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001999319441048004, |
| "loss": 0.0872, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019992800011407172, |
| "loss": 0.0762, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019992394507909174, |
| "loss": 0.0746, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019991977900436658, |
| "loss": 0.0713, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0001999155018945258, |
| "loss": 0.077, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019991111375432225, |
| "loss": 0.0619, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_loss": 0.07623383402824402, |
| "eval_runtime": 654.0144, |
| "eval_samples_per_second": 11.251, |
| "eval_steps_per_second": 2.813, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019990661458863226, |
| "loss": 0.0702, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0001999020044024555, |
| "loss": 0.0725, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019989728320091496, |
| "loss": 0.065, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019989245098925708, |
| "loss": 0.0632, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019988750777285162, |
| "loss": 0.0762, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019988245355719166, |
| "loss": 0.0736, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001998772883478937, |
| "loss": 0.0662, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0001998720121506975, |
| "loss": 0.0765, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019986662497146622, |
| "loss": 0.0694, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019986112681618634, |
| "loss": 0.0722, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_loss": 0.075162373483181, |
| "eval_runtime": 656.2929, |
| "eval_samples_per_second": 11.211, |
| "eval_steps_per_second": 2.804, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0001998555176909676, |
| "loss": 0.073, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019984979760204313, |
| "loss": 0.0681, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019984396655576932, |
| "loss": 0.057, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001998380245586259, |
| "loss": 0.0689, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019983197161721583, |
| "loss": 0.057, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019982580773826545, |
| "loss": 0.0693, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019981953292862428, |
| "loss": 0.0702, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001998131471952652, |
| "loss": 0.0681, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019980665054528425, |
| "loss": 0.0657, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019980004298590081, |
| "loss": 0.0724, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_loss": 0.0736309364438057, |
| "eval_runtime": 657.9967, |
| "eval_samples_per_second": 11.182, |
| "eval_steps_per_second": 2.796, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019979332452445752, |
| "loss": 0.0599, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019978649516842016, |
| "loss": 0.0697, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019977955492537787, |
| "loss": 0.0715, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019977250380304287, |
| "loss": 0.0589, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001997653418092507, |
| "loss": 0.0722, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019975806895196008, |
| "loss": 0.0611, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001997506852392529, |
| "loss": 0.056, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0001997431906793343, |
| "loss": 0.0614, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019973558528053252, |
| "loss": 0.0537, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.000199727869051299, |
| "loss": 0.0762, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_loss": 0.07101555913686752, |
| "eval_runtime": 665.8897, |
| "eval_samples_per_second": 11.05, |
| "eval_steps_per_second": 2.763, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019972004200020832, |
| "loss": 0.0634, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0001997121041359583, |
| "loss": 0.0697, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019970405546736978, |
| "loss": 0.0605, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019969589600338678, |
| "loss": 0.0598, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019968762575307649, |
| "loss": 0.066, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019967924472562914, |
| "loss": 0.0656, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019967075293035805, |
| "loss": 0.0614, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0001996621503766997, |
| "loss": 0.0633, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019965343707421362, |
| "loss": 0.0661, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019964461303258242, |
| "loss": 0.0567, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_loss": 0.07119747996330261, |
| "eval_runtime": 657.3478, |
| "eval_samples_per_second": 11.193, |
| "eval_steps_per_second": 2.799, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019963567826161168, |
| "loss": 0.0577, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019962663277123016, |
| "loss": 0.0633, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001996174765714896, |
| "loss": 0.0701, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001996082096725647, |
| "loss": 0.0715, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001995988320847533, |
| "loss": 0.0618, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019958934381847612, |
| "loss": 0.0589, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019957974488427698, |
| "loss": 0.0658, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019957003529282255, |
| "loss": 0.0549, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019956021505490262, |
| "loss": 0.0664, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0001995502841814298, |
| "loss": 0.0578, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_loss": 0.06790292263031006, |
| "eval_runtime": 663.3404, |
| "eval_samples_per_second": 11.092, |
| "eval_steps_per_second": 2.774, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019954024268343975, |
| "loss": 0.0626, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.000199530090572091, |
| "loss": 0.0621, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019951982785866492, |
| "loss": 0.0527, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019950945455456597, |
| "loss": 0.0636, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019949897067132142, |
| "loss": 0.0767, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019948837622058135, |
| "loss": 0.0785, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019947767121411878, |
| "loss": 0.0698, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0001994668556638296, |
| "loss": 0.0576, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019945592958173247, |
| "loss": 0.0616, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019944489297996892, |
| "loss": 0.0597, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_loss": 0.06911394000053406, |
| "eval_runtime": 654.3952, |
| "eval_samples_per_second": 11.244, |
| "eval_steps_per_second": 2.812, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019943374587080333, |
| "loss": 0.0674, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001994224882666228, |
| "loss": 0.0528, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001994111201799373, |
| "loss": 0.0602, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00019939964162337946, |
| "loss": 0.0568, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00019938805260970486, |
| "loss": 0.0571, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001993763531517916, |
| "loss": 0.0598, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019936454326264068, |
| "loss": 0.0636, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019935262295537568, |
| "loss": 0.0602, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019934059224324303, |
| "loss": 0.0763, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019932845113961172, |
| "loss": 0.0538, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_loss": 0.06597457081079483, |
| "eval_runtime": 653.2731, |
| "eval_samples_per_second": 11.263, |
| "eval_steps_per_second": 2.817, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0001993161996579735, |
| "loss": 0.0578, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019930383781194272, |
| "loss": 0.0561, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019929136561525637, |
| "loss": 0.0558, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001992787830817741, |
| "loss": 0.0669, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019926609022547818, |
| "loss": 0.0616, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019925328706047344, |
| "loss": 0.0637, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019924037360098727, |
| "loss": 0.052, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0001992273498613697, |
| "loss": 0.0548, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019921421585609325, |
| "loss": 0.0545, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.000199200971599753, |
| "loss": 0.0498, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_loss": 0.06580409407615662, |
| "eval_runtime": 655.2658, |
| "eval_samples_per_second": 11.229, |
| "eval_steps_per_second": 2.808, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019918761710706649, |
| "loss": 0.0537, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019917415239287386, |
| "loss": 0.0579, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019916057747213758, |
| "loss": 0.053, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019914689235994278, |
| "loss": 0.0562, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019913309707149694, |
| "loss": 0.0595, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019911919162212988, |
| "loss": 0.0515, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.000199105176027294, |
| "loss": 0.053, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019909105030256402, |
| "loss": 0.065, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.000199076814463637, |
| "loss": 0.0574, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001990624685263325, |
| "loss": 0.0595, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 0.06419895589351654, |
| "eval_runtime": 662.3031, |
| "eval_samples_per_second": 11.11, |
| "eval_steps_per_second": 2.778, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019904801250659223, |
| "loss": 0.0667, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019903344642048043, |
| "loss": 0.0562, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019901877028418347, |
| "loss": 0.0604, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019900398411401015, |
| "loss": 0.0474, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0001989890879263915, |
| "loss": 0.0444, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0001989740817378808, |
| "loss": 0.0522, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019895896556515352, |
| "loss": 0.0543, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019894373942500743, |
| "loss": 0.0447, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019892840333436245, |
| "loss": 0.0454, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019891295731026075, |
| "loss": 0.0465, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 0.0676579549908638, |
| "eval_runtime": 660.0804, |
| "eval_samples_per_second": 11.147, |
| "eval_steps_per_second": 2.788, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0001988974013698665, |
| "loss": 0.0533, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019888173553046624, |
| "loss": 0.0522, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019886595980946842, |
| "loss": 0.0592, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019885007422440379, |
| "loss": 0.0584, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019883407879292498, |
| "loss": 0.054, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019881797353280695, |
| "loss": 0.0442, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019880175846194638, |
| "loss": 0.0659, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019878543359836223, |
| "loss": 0.0483, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001987689989601954, |
| "loss": 0.0525, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019875245456570873, |
| "loss": 0.0533, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 0.06512827426195145, |
| "eval_runtime": 659.6413, |
| "eval_samples_per_second": 11.155, |
| "eval_steps_per_second": 2.789, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019873580043328705, |
| "loss": 0.0438, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019871903658143719, |
| "loss": 0.0542, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0001987021630287878, |
| "loss": 0.0555, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019868517979408954, |
| "loss": 0.0561, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019866808689621482, |
| "loss": 0.0517, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0001986508843541581, |
| "loss": 0.051, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0001986335721870355, |
| "loss": 0.0525, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0001986161504140851, |
| "loss": 0.0443, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019859861905466668, |
| "loss": 0.0542, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019858097812826183, |
| "loss": 0.0593, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 0.06405352801084518, |
| "eval_runtime": 658.5179, |
| "eval_samples_per_second": 11.174, |
| "eval_steps_per_second": 2.794, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0001985632276544739, |
| "loss": 0.0527, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.000198545367653028, |
| "loss": 0.0422, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019852739814377087, |
| "loss": 0.0533, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019850931914667107, |
| "loss": 0.0507, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019849113068181869, |
| "loss": 0.0581, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019847283276942554, |
| "loss": 0.0583, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019845442542982506, |
| "loss": 0.0562, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019843590868347225, |
| "loss": 0.0499, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019841728255094374, |
| "loss": 0.0543, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019839854705293764, |
| "loss": 0.055, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_loss": 0.0653354823589325, |
| "eval_runtime": 674.0253, |
| "eval_samples_per_second": 10.917, |
| "eval_steps_per_second": 2.73, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00019837970221027365, |
| "loss": 0.0422, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00019836074804389296, |
| "loss": 0.0586, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00019834168457485824, |
| "loss": 0.0408, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00019832251182435367, |
| "loss": 0.0587, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00019830322981368478, |
| "loss": 0.0498, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001982838385642786, |
| "loss": 0.0475, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019826433809768345, |
| "loss": 0.0571, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019824472843556914, |
| "loss": 0.0512, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019822500959972673, |
| "loss": 0.0477, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019820518161206864, |
| "loss": 0.0546, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_loss": 0.06339588016271591, |
| "eval_runtime": 1055.2159, |
| "eval_samples_per_second": 6.973, |
| "eval_steps_per_second": 1.744, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019818524449462862, |
| "loss": 0.0463, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019816519826956156, |
| "loss": 0.0571, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019814504295914372, |
| "loss": 0.0516, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019812477858577258, |
| "loss": 0.0511, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019810440517196672, |
| "loss": 0.0471, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019808392274036596, |
| "loss": 0.0497, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019806333131373126, |
| "loss": 0.0608, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019804263091494466, |
| "loss": 0.0431, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019802182156700938, |
| "loss": 0.0558, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019800090329304956, |
| "loss": 0.0524, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_loss": 0.06149962544441223, |
| "eval_runtime": 1053.8269, |
| "eval_samples_per_second": 6.982, |
| "eval_steps_per_second": 1.746, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019797987611631059, |
| "loss": 0.0528, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019795874006015872, |
| "loss": 0.0481, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001979374951480812, |
| "loss": 0.0491, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019791614140368633, |
| "loss": 0.049, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019789467885070327, |
| "loss": 0.0444, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019787310751298213, |
| "loss": 0.0466, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019785142741449393, |
| "loss": 0.0498, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019782963857933048, |
| "loss": 0.0475, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019780774103170446, |
| "loss": 0.047, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019778573479594942, |
| "loss": 0.0432, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 0.06317023187875748, |
| "eval_runtime": 1054.8968, |
| "eval_samples_per_second": 6.975, |
| "eval_steps_per_second": 1.744, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019776361989651956, |
| "loss": 0.0546, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0001977413963579899, |
| "loss": 0.043, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019771906420505624, |
| "loss": 0.0579, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019769662346253493, |
| "loss": 0.057, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019767407415536317, |
| "loss": 0.0476, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00019765141630859865, |
| "loss": 0.0418, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019762864994741976, |
| "loss": 0.0497, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019760577509712546, |
| "loss": 0.0493, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019758279178313525, |
| "loss": 0.0512, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019755970003098916, |
| "loss": 0.0631, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 0.06188439950346947, |
| "eval_runtime": 1062.072, |
| "eval_samples_per_second": 6.928, |
| "eval_steps_per_second": 1.732, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00019753649986634772, |
| "loss": 0.0543, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00019751319131499194, |
| "loss": 0.0405, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00019748977440282333, |
| "loss": 0.0564, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0001974662491558637, |
| "loss": 0.0396, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00019744261560025533, |
| "loss": 0.039, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00019741887376226083, |
| "loss": 0.0348, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00019739502366826313, |
| "loss": 0.0533, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001973710653447655, |
| "loss": 0.0417, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001973469988183914, |
| "loss": 0.0502, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00019732282411588463, |
| "loss": 0.0519, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_loss": 0.05978045240044594, |
| "eval_runtime": 1059.6808, |
| "eval_samples_per_second": 6.944, |
| "eval_steps_per_second": 1.736, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00019729854126410913, |
| "loss": 0.0416, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019727415029004906, |
| "loss": 0.0586, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019724965122080868, |
| "loss": 0.0535, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0001972250440836124, |
| "loss": 0.0499, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00019720032890580474, |
| "loss": 0.0484, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00019717550571485024, |
| "loss": 0.044, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001971505745383335, |
| "loss": 0.0496, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00019712553540395908, |
| "loss": 0.05, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001971003883395516, |
| "loss": 0.0387, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00019707513337305547, |
| "loss": 0.0397, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_loss": 0.060694798827171326, |
| "eval_runtime": 1049.503, |
| "eval_samples_per_second": 7.011, |
| "eval_steps_per_second": 1.753, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0001970497705325351, |
| "loss": 0.0475, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019702429984617484, |
| "loss": 0.0405, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019699872134227867, |
| "loss": 0.0447, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019697303504927061, |
| "loss": 0.0608, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00019694724099569434, |
| "loss": 0.0478, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00019692133921021332, |
| "loss": 0.0432, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00019689532972161068, |
| "loss": 0.0492, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00019686921255878932, |
| "loss": 0.0441, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001968429877507717, |
| "loss": 0.0603, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00019681665532669996, |
| "loss": 0.0467, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 0.06163698434829712, |
| "eval_runtime": 1049.1311, |
| "eval_samples_per_second": 7.013, |
| "eval_steps_per_second": 1.754, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019679021531583584, |
| "loss": 0.0406, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019676366774756056, |
| "loss": 0.0484, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019673701265137495, |
| "loss": 0.0402, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019671025005689926, |
| "loss": 0.0541, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019668337999387324, |
| "loss": 0.0438, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019665640249215605, |
| "loss": 0.045, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0001966293175817262, |
| "loss": 0.0456, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019660212529268168, |
| "loss": 0.0454, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019657482565523963, |
| "loss": 0.0464, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019654741869973663, |
| "loss": 0.049, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_loss": 0.06358644366264343, |
| "eval_runtime": 1050.5846, |
| "eval_samples_per_second": 7.004, |
| "eval_steps_per_second": 1.751, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019651990445662841, |
| "loss": 0.053, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019649228295649004, |
| "loss": 0.05, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019646455423001565, |
| "loss": 0.0423, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0001964367183080186, |
| "loss": 0.0556, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019640877522143134, |
| "loss": 0.0431, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0001963807250013054, |
| "loss": 0.0377, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019635256767881144, |
| "loss": 0.0432, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019632430328523902, |
| "loss": 0.0562, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0001962959318519968, |
| "loss": 0.04, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019626745341061225, |
| "loss": 0.0488, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_loss": 0.06040577217936516, |
| "eval_runtime": 1049.7742, |
| "eval_samples_per_second": 7.009, |
| "eval_steps_per_second": 1.753, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0001962388679927319, |
| "loss": 0.0371, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.000196210175630121, |
| "loss": 0.0412, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00019618137635466382, |
| "loss": 0.0432, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00019615247019836327, |
| "loss": 0.0501, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00019612345719334116, |
| "loss": 0.0422, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019609433737183791, |
| "loss": 0.0511, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019606511076621276, |
| "loss": 0.0372, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019603577740894354, |
| "loss": 0.0541, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0001960063373326267, |
| "loss": 0.0472, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00019597679056997737, |
| "loss": 0.0449, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_loss": 0.05977201834321022, |
| "eval_runtime": 1052.2024, |
| "eval_samples_per_second": 6.993, |
| "eval_steps_per_second": 1.749, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019594713715382915, |
| "loss": 0.0517, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019591737711713414, |
| "loss": 0.0476, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019588751049296298, |
| "loss": 0.048, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019585753731450478, |
| "loss": 0.0342, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00019582745761506697, |
| "loss": 0.0508, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00019579727142807535, |
| "loss": 0.0407, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001957669787870742, |
| "loss": 0.0358, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00019573657972572593, |
| "loss": 0.0506, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00019570607427781128, |
| "loss": 0.0476, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001956754624772292, |
| "loss": 0.0438, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_loss": 0.061029911041259766, |
| "eval_runtime": 1062.1739, |
| "eval_samples_per_second": 6.927, |
| "eval_steps_per_second": 1.732, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0001956447443579968, |
| "loss": 0.0481, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00019561391995424941, |
| "loss": 0.0515, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00019558298930024044, |
| "loss": 0.0491, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0001955519524303413, |
| "loss": 0.0367, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0001955208093790415, |
| "loss": 0.0431, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0001954895601809485, |
| "loss": 0.0436, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0001954582048707878, |
| "loss": 0.0442, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019542674348340267, |
| "loss": 0.0532, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019539517605375446, |
| "loss": 0.0441, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019536350261692214, |
| "loss": 0.036, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 0.06326338648796082, |
| "eval_runtime": 1061.8457, |
| "eval_samples_per_second": 6.929, |
| "eval_steps_per_second": 1.733, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019533172320810265, |
| "loss": 0.0486, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019529983786261058, |
| "loss": 0.0568, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019526784661587829, |
| "loss": 0.0434, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0001952357495034558, |
| "loss": 0.0541, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00019520354656101085, |
| "loss": 0.0395, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019517123782432868, |
| "loss": 0.0458, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019513882332931212, |
| "loss": 0.0395, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019510630311198157, |
| "loss": 0.0442, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019507367720847488, |
| "loss": 0.041, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019504094565504733, |
| "loss": 0.0464, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 0.06028781458735466, |
| "eval_runtime": 1035.9718, |
| "eval_samples_per_second": 7.103, |
| "eval_steps_per_second": 1.776, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019500810848807162, |
| "loss": 0.0396, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019497516574403778, |
| "loss": 0.0491, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019494211745955324, |
| "loss": 0.0585, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019490896367134266, |
| "loss": 0.0484, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019487570441624791, |
| "loss": 0.0507, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001948423397312281, |
| "loss": 0.0467, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001948088696533595, |
| "loss": 0.0444, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00019477529421983546, |
| "loss": 0.0498, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001947416134679665, |
| "loss": 0.0373, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00019470782743518002, |
| "loss": 0.0437, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_loss": 0.06118550896644592, |
| "eval_runtime": 1055.3909, |
| "eval_samples_per_second": 6.972, |
| "eval_steps_per_second": 1.743, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00019467393615902055, |
| "loss": 0.0391, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0001946399396771495, |
| "loss": 0.0456, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00019460583802734523, |
| "loss": 0.0368, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0001945716312475029, |
| "loss": 0.0451, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0001945373193756346, |
| "loss": 0.046, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019450290244986914, |
| "loss": 0.0364, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019446838050845205, |
| "loss": 0.0576, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019443375358974555, |
| "loss": 0.0421, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00019439902173222859, |
| "loss": 0.0528, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0001943641849744967, |
| "loss": 0.0389, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.06049993634223938, |
| "eval_runtime": 1052.016, |
| "eval_samples_per_second": 6.994, |
| "eval_steps_per_second": 1.749, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019432924335526194, |
| "loss": 0.0432, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019429419691335297, |
| "loss": 0.0339, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019425904568771483, |
| "loss": 0.0434, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019422378971740907, |
| "loss": 0.0472, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019418842904161368, |
| "loss": 0.0394, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019415296369962288, |
| "loss": 0.0404, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019411739373084732, |
| "loss": 0.0352, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019408171917481386, |
| "loss": 0.0364, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019404594007116555, |
| "loss": 0.0433, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00019401005645966167, |
| "loss": 0.0377, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_loss": 0.06322025507688522, |
| "eval_runtime": 1050.1226, |
| "eval_samples_per_second": 7.007, |
| "eval_steps_per_second": 1.752, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019397406838017766, |
| "loss": 0.042, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019393797587270497, |
| "loss": 0.0326, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019390177897735114, |
| "loss": 0.0443, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019386547773433965, |
| "loss": 0.0545, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019382907218401006, |
| "loss": 0.0422, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00019379256236681775, |
| "loss": 0.0393, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.000193755948323334, |
| "loss": 0.0516, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00019371923009424587, |
| "loss": 0.0499, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0001936824077203562, |
| "loss": 0.0491, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0001936454812425836, |
| "loss": 0.0382, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_loss": 0.06261178106069565, |
| "eval_runtime": 1054.0723, |
| "eval_samples_per_second": 6.981, |
| "eval_steps_per_second": 1.746, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019360845070196236, |
| "loss": 0.0431, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0001935713161396424, |
| "loss": 0.0498, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0001935340775968892, |
| "loss": 0.0457, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019349673511508383, |
| "loss": 0.0362, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019345928873572282, |
| "loss": 0.0432, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019342173850041822, |
| "loss": 0.0365, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019338408445089745, |
| "loss": 0.0358, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0001933463266290033, |
| "loss": 0.039, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019330846507669382, |
| "loss": 0.0412, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019327049983604245, |
| "loss": 0.051, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 0.061244383454322815, |
| "eval_runtime": 1045.5529, |
| "eval_samples_per_second": 7.037, |
| "eval_steps_per_second": 1.76, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00019323243094923772, |
| "loss": 0.0387, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00019319425845858341, |
| "loss": 0.0473, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00019315598240649847, |
| "loss": 0.037, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001931176028355168, |
| "loss": 0.0435, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00019307911978828747, |
| "loss": 0.0364, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0001930405333075745, |
| "loss": 0.0424, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019300184343625678, |
| "loss": 0.0348, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019296305021732817, |
| "loss": 0.0395, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019292415369389734, |
| "loss": 0.0486, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019288515390918776, |
| "loss": 0.047, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_loss": 0.06326541304588318, |
| "eval_runtime": 1046.8433, |
| "eval_samples_per_second": 7.029, |
| "eval_steps_per_second": 1.758, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019284605090653766, |
| "loss": 0.0403, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019280684472939994, |
| "loss": 0.0381, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019276753542134224, |
| "loss": 0.0332, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019272812302604665, |
| "loss": 0.0435, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00019268860758730997, |
| "loss": 0.0512, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019264898914904342, |
| "loss": 0.0403, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019260926775527265, |
| "loss": 0.0336, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019256944345013785, |
| "loss": 0.0425, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019252951627789344, |
| "loss": 0.0431, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019248948628290818, |
| "loss": 0.0451, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_loss": 0.06083804368972778, |
| "eval_runtime": 1049.5583, |
| "eval_samples_per_second": 7.011, |
| "eval_steps_per_second": 1.753, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019244935350966514, |
| "loss": 0.032, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019240911800276153, |
| "loss": 0.0378, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0001923687798069088, |
| "loss": 0.044, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019232833896693242, |
| "loss": 0.0363, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00019228779552777202, |
| "loss": 0.0412, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001922471495344812, |
| "loss": 0.0376, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001922064010322275, |
| "loss": 0.0451, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00019216555006629237, |
| "loss": 0.0443, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001921245966820712, |
| "loss": 0.0394, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019208354092507305, |
| "loss": 0.0472, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 0.06302132457494736, |
| "eval_runtime": 1043.9496, |
| "eval_samples_per_second": 7.048, |
| "eval_steps_per_second": 1.763, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019204238284092093, |
| "loss": 0.0418, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019200112247535141, |
| "loss": 0.0416, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019195975987421472, |
| "loss": 0.0418, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019191829508347481, |
| "loss": 0.0394, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00019187672814920912, |
| "loss": 0.0405, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00019183505911760855, |
| "loss": 0.0306, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00019179328803497754, |
| "loss": 0.0443, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.0001917514149477339, |
| "loss": 0.035, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00019170943990240877, |
| "loss": 0.0371, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0001916673629456466, |
| "loss": 0.0394, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_loss": 0.06118810176849365, |
| "eval_runtime": 1046.2783, |
| "eval_samples_per_second": 7.033, |
| "eval_steps_per_second": 1.759, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00019162518412420512, |
| "loss": 0.0337, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00019158290348495524, |
| "loss": 0.0433, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.000191540521074881, |
| "loss": 0.0402, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0001914980369410795, |
| "loss": 0.0326, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00019145545113076096, |
| "loss": 0.0501, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00019141276369124855, |
| "loss": 0.0448, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00019136997466997837, |
| "loss": 0.0403, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00019132708411449936, |
| "loss": 0.0427, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0001912840920724734, |
| "loss": 0.0322, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00019124099859167503, |
| "loss": 0.0471, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_loss": 0.06139204651117325, |
| "eval_runtime": 1057.3652, |
| "eval_samples_per_second": 6.959, |
| "eval_steps_per_second": 1.74, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00019119780371999162, |
| "loss": 0.0306, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00019115450750542304, |
| "loss": 0.0352, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.000191111109996082, |
| "loss": 0.0351, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00019106761124019364, |
| "loss": 0.0295, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00019102401128609557, |
| "loss": 0.0379, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00019098031018223796, |
| "loss": 0.048, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00019093650797718338, |
| "loss": 0.0454, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00019089260471960663, |
| "loss": 0.0463, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0001908486004582949, |
| "loss": 0.0447, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019080449524214762, |
| "loss": 0.031, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 0.06018984317779541, |
| "eval_runtime": 1049.9509, |
| "eval_samples_per_second": 7.008, |
| "eval_steps_per_second": 1.752, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019076028912017642, |
| "loss": 0.0292, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019071598214150494, |
| "loss": 0.0252, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019067157435536904, |
| "loss": 0.0382, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019062706581111653, |
| "loss": 0.0303, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0001905824565582072, |
| "loss": 0.0419, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00019053774664621272, |
| "loss": 0.0358, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00019049293612481675, |
| "loss": 0.037, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00019044802504381453, |
| "loss": 0.0306, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00019040301345311326, |
| "loss": 0.0431, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0001903579014027317, |
| "loss": 0.0405, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_loss": 0.05838855355978012, |
| "eval_runtime": 1051.4215, |
| "eval_samples_per_second": 6.998, |
| "eval_steps_per_second": 1.75, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019031268894280023, |
| "loss": 0.0334, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019026737612356094, |
| "loss": 0.0382, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019022196299536733, |
| "loss": 0.0371, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00019017644960868445, |
| "loss": 0.0353, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00019013083601408863, |
| "loss": 0.0368, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0001900851222622677, |
| "loss": 0.0427, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00019003930840402072, |
| "loss": 0.031, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00018999339449025796, |
| "loss": 0.0377, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00018994738057200099, |
| "loss": 0.0373, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0001899012667003824, |
| "loss": 0.0421, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_loss": 0.05914044752717018, |
| "eval_runtime": 1056.4599, |
| "eval_samples_per_second": 6.965, |
| "eval_steps_per_second": 1.742, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018985505292664587, |
| "loss": 0.0344, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018980873930214614, |
| "loss": 0.0355, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018976232587834886, |
| "loss": 0.0416, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00018971581270683062, |
| "loss": 0.0395, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00018966919983927886, |
| "loss": 0.0366, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00018962248732749175, |
| "loss": 0.0368, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001895756752233782, |
| "loss": 0.0362, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001895287635789579, |
| "loss": 0.0395, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00018948175244636097, |
| "loss": 0.0462, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00018943464187782828, |
| "loss": 0.0454, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_loss": 0.05928103253245354, |
| "eval_runtime": 1029.6162, |
| "eval_samples_per_second": 7.146, |
| "eval_steps_per_second": 1.787, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.000189387431925711, |
| "loss": 0.0326, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0001893401226424709, |
| "loss": 0.031, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00018929271408068011, |
| "loss": 0.0408, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.89, |
| "step": 4431, |
| "total_flos": 1.532304306097619e+18, |
| "train_loss": 9.867495303766314e-06, |
| "train_runtime": 4.4906, |
| "train_samples_per_second": 15784.097, |
| "train_steps_per_second": 986.506 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4430, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "total_flos": 1.532304306097619e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|