| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.17394329448599757, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 5.797, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.2e-05, | |
| "loss": 4.6838, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 7.199999999999999e-05, | |
| "loss": 5.6319, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000102, | |
| "loss": 5.0822, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000129, | |
| "loss": 4.2645, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000159, | |
| "loss": 4.4867, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00018899999999999999, | |
| "loss": 4.321, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00021899999999999998, | |
| "loss": 3.9697, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000249, | |
| "loss": 3.8873, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000279, | |
| "loss": 3.9087, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002999947786737831, | |
| "loss": 3.7777, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002999773742530603, | |
| "loss": 4.0537, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029995996983233736, | |
| "loss": 3.8181, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029994256541161454, | |
| "loss": 3.7261, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029992516099089166, | |
| "loss": 4.0132, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002999077565701688, | |
| "loss": 3.7437, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002998903521494459, | |
| "loss": 3.5736, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029987294772872305, | |
| "loss": 3.8578, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029985554330800023, | |
| "loss": 3.5236, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029983813888727736, | |
| "loss": 3.8339, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002998207344665545, | |
| "loss": 3.8672, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002998033300458316, | |
| "loss": 3.4447, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029978592562510874, | |
| "loss": 3.3767, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002997685212043859, | |
| "loss": 3.8753, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000299751116783663, | |
| "loss": 3.825, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002997337123629402, | |
| "loss": 3.7691, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002997163079422173, | |
| "loss": 3.7529, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029969890352149443, | |
| "loss": 3.7102, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029968149910077155, | |
| "loss": 3.6104, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002996640946800487, | |
| "loss": 3.7396, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029964669025932586, | |
| "loss": 3.5363, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000299629285838603, | |
| "loss": 3.7725, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002996118814178801, | |
| "loss": 3.728, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029959447699715725, | |
| "loss": 3.333, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029957707257643437, | |
| "loss": 3.7651, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029955966815571155, | |
| "loss": 3.6743, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029954226373498863, | |
| "loss": 3.6236, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002995248593142658, | |
| "loss": 3.837, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029950745489354294, | |
| "loss": 3.8097, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029949005047282006, | |
| "loss": 3.886, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002994726460520972, | |
| "loss": 3.7826, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002994552416313743, | |
| "loss": 3.6854, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002994378372106515, | |
| "loss": 3.4106, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029942043278992857, | |
| "loss": 3.7991, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029940302836920575, | |
| "loss": 3.5353, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002993856239484829, | |
| "loss": 3.7661, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029936821952776, | |
| "loss": 3.43, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029935081510703714, | |
| "loss": 3.5965, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029933341068631426, | |
| "loss": 3.6576, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029931600626559144, | |
| "loss": 3.6634, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029929860184486857, | |
| "loss": 3.5374, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002992811974241457, | |
| "loss": 3.6912, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002992637930034228, | |
| "loss": 3.6375, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029924638858269995, | |
| "loss": 3.3367, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029922898416197714, | |
| "loss": 3.7179, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029921157974125426, | |
| "loss": 3.5059, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002991941753205314, | |
| "loss": 3.2627, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002991767708998085, | |
| "loss": 3.5826, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002991593664790857, | |
| "loss": 3.5854, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029914196205836277, | |
| "loss": 3.6205, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029912455763763995, | |
| "loss": 3.3627, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002991071532169171, | |
| "loss": 3.7299, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002990897487961942, | |
| "loss": 3.4119, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029907234437547133, | |
| "loss": 3.6321, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029905493995474846, | |
| "loss": 3.4665, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029903753553402564, | |
| "loss": 3.3959, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029902013111330277, | |
| "loss": 3.4348, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002990027266925799, | |
| "loss": 3.6478, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.000298985322271857, | |
| "loss": 3.5589, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029896791785113415, | |
| "loss": 3.4357, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029895051343041133, | |
| "loss": 3.5833, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002989331090096884, | |
| "loss": 3.4633, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002989157045889656, | |
| "loss": 3.4848, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002988983001682427, | |
| "loss": 3.1688, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029888089574751984, | |
| "loss": 3.7193, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029886349132679697, | |
| "loss": 3.5895, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002988460869060741, | |
| "loss": 3.5518, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002988286824853513, | |
| "loss": 3.5631, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002988112780646284, | |
| "loss": 3.5545, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029879387364390553, | |
| "loss": 3.5699, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029877646922318266, | |
| "loss": 3.5634, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002987590648024598, | |
| "loss": 3.607, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029874166038173697, | |
| "loss": 3.3944, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029872425596101404, | |
| "loss": 3.5578, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002987068515402912, | |
| "loss": 3.3392, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029868944711956835, | |
| "loss": 3.5491, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002986720426988455, | |
| "loss": 3.4634, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002986546382781226, | |
| "loss": 3.481, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029863723385739973, | |
| "loss": 3.4969, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002986198294366769, | |
| "loss": 3.4256, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029860242501595404, | |
| "loss": 3.3778, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029858502059523117, | |
| "loss": 3.291, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002985676161745083, | |
| "loss": 3.5129, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002985502117537854, | |
| "loss": 3.5895, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002985328073330626, | |
| "loss": 3.4668, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002985154029123397, | |
| "loss": 3.4919, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029849799849161686, | |
| "loss": 3.5387, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.000298480594070894, | |
| "loss": 3.5237, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002984631896501711, | |
| "loss": 3.5108, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029844578522944824, | |
| "loss": 3.6527, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029842838080872537, | |
| "loss": 3.2056, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029841097638800255, | |
| "loss": 3.4018, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002983935719672797, | |
| "loss": 3.4554, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002983761675465568, | |
| "loss": 3.4236, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029835876312583393, | |
| "loss": 3.3244, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029834135870511106, | |
| "loss": 3.4164, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029832395428438824, | |
| "loss": 3.0626, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002983065498636653, | |
| "loss": 3.7244, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002982891454429425, | |
| "loss": 3.4565, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002982717410222196, | |
| "loss": 3.5781, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029825433660149675, | |
| "loss": 3.2126, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002982369321807739, | |
| "loss": 3.6434, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.000298219527760051, | |
| "loss": 3.4129, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002982021233393282, | |
| "loss": 3.2887, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002981847189186053, | |
| "loss": 3.5407, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029816731449788244, | |
| "loss": 3.5717, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029814991007715957, | |
| "loss": 3.6888, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002981325056564367, | |
| "loss": 3.6167, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002981151012357139, | |
| "loss": 3.3514, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029809769681499095, | |
| "loss": 3.4163, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029808029239426813, | |
| "loss": 3.5967, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029806288797354526, | |
| "loss": 3.4587, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002980454835528224, | |
| "loss": 3.3907, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002980280791320995, | |
| "loss": 3.6969, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029801067471137664, | |
| "loss": 3.2609, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002979932702906538, | |
| "loss": 3.5595, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029797586586993095, | |
| "loss": 3.3332, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002979584614492081, | |
| "loss": 3.4112, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002979410570284852, | |
| "loss": 3.4097, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029792365260776233, | |
| "loss": 3.619, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002979062481870395, | |
| "loss": 3.2694, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002978888437663166, | |
| "loss": 3.6746, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029787143934559377, | |
| "loss": 3.3979, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002978540349248709, | |
| "loss": 3.5019, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000297836630504148, | |
| "loss": 3.2628, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029781922608342515, | |
| "loss": 3.2965, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002978018216627023, | |
| "loss": 3.6118, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029778441724197946, | |
| "loss": 3.5439, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029776701282125653, | |
| "loss": 3.1755, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002977496084005337, | |
| "loss": 3.6912, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029773220397981084, | |
| "loss": 3.2572, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029771479955908797, | |
| "loss": 3.364, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002976973951383651, | |
| "loss": 3.4011, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002976799907176422, | |
| "loss": 3.5433, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002976625862969194, | |
| "loss": 3.4217, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029764518187619653, | |
| "loss": 3.331, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029762777745547366, | |
| "loss": 3.1582, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002976103730347508, | |
| "loss": 3.3783, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002975929686140279, | |
| "loss": 3.5096, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002975755641933051, | |
| "loss": 3.3047, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029755815977258217, | |
| "loss": 3.3239, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029754075535185935, | |
| "loss": 3.3897, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002975233509311365, | |
| "loss": 2.9298, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002975059465104136, | |
| "loss": 3.2706, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029748854208969073, | |
| "loss": 3.6979, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029747113766896786, | |
| "loss": 3.3929, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029745373324824504, | |
| "loss": 3.3763, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029743632882752217, | |
| "loss": 3.4914, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002974189244067993, | |
| "loss": 3.4507, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002974015199860764, | |
| "loss": 3.4506, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029738411556535355, | |
| "loss": 3.2699, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029736671114463073, | |
| "loss": 3.5554, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029734930672390786, | |
| "loss": 3.677, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000297331902303185, | |
| "loss": 3.3466, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002973144978824621, | |
| "loss": 3.4125, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029729709346173924, | |
| "loss": 3.4123, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029727968904101637, | |
| "loss": 3.1869, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029726228462029355, | |
| "loss": 3.5066, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002972448801995707, | |
| "loss": 3.68, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002972274757788478, | |
| "loss": 3.3881, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029721007135812493, | |
| "loss": 3.5452, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029719266693740206, | |
| "loss": 3.4605, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029717526251667924, | |
| "loss": 3.5601, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029715785809595637, | |
| "loss": 3.3588, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002971404536752335, | |
| "loss": 3.3235, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002971230492545106, | |
| "loss": 3.6218, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029710564483378775, | |
| "loss": 3.4837, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029708824041306493, | |
| "loss": 3.402, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000297070835992342, | |
| "loss": 3.2912, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002970534315716192, | |
| "loss": 3.704, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002970360271508963, | |
| "loss": 3.6123, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029701862273017344, | |
| "loss": 3.3626, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029700121830945057, | |
| "loss": 3.2765, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002969838138887277, | |
| "loss": 3.3083, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002969664094680049, | |
| "loss": 3.8005, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000296949005047282, | |
| "loss": 3.6119, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029693160062655913, | |
| "loss": 3.2547, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029691419620583626, | |
| "loss": 3.4915, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002968967917851134, | |
| "loss": 3.4441, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029687938736439057, | |
| "loss": 3.6537, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029686198294366764, | |
| "loss": 3.2625, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002968445785229448, | |
| "loss": 3.411, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029682717410222195, | |
| "loss": 2.997, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002968097696814991, | |
| "loss": 3.3158, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002967923652607762, | |
| "loss": 3.0668, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029677496084005333, | |
| "loss": 3.3956, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002967575564193305, | |
| "loss": 3.5991, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029674015199860764, | |
| "loss": 3.5781, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029672274757788477, | |
| "loss": 3.5981, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002967053431571619, | |
| "loss": 3.6671, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000296687938736439, | |
| "loss": 2.9026, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002966705343157162, | |
| "loss": 3.2852, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002966531298949933, | |
| "loss": 3.439, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029663572547427046, | |
| "loss": 3.3326, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002966183210535476, | |
| "loss": 3.3541, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002966009166328247, | |
| "loss": 3.3194, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029658351221210184, | |
| "loss": 3.5999, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029656610779137896, | |
| "loss": 3.1402, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029654870337065615, | |
| "loss": 3.0779, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002965312989499333, | |
| "loss": 3.4571, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002965138945292104, | |
| "loss": 3.2815, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029649649010848753, | |
| "loss": 3.2003, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029647908568776466, | |
| "loss": 3.5262, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029646168126704184, | |
| "loss": 3.1835, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002964442768463189, | |
| "loss": 3.0947, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002964268724255961, | |
| "loss": 3.5398, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002964094680048732, | |
| "loss": 3.319, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029639206358415035, | |
| "loss": 3.3722, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002963746591634275, | |
| "loss": 3.3396, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002963572547427046, | |
| "loss": 3.2734, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002963398503219818, | |
| "loss": 3.3672, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002963224459012589, | |
| "loss": 3.3559, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029630504148053604, | |
| "loss": 3.3601, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029628763705981316, | |
| "loss": 3.6175, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002962702326390903, | |
| "loss": 3.2681, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029625282821836747, | |
| "loss": 3.3181, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029623542379764455, | |
| "loss": 3.5367, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029621801937692173, | |
| "loss": 3.2207, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029620061495619885, | |
| "loss": 3.2358, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.000296183210535476, | |
| "loss": 3.4455, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002961658061147531, | |
| "loss": 3.4017, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029614840169403024, | |
| "loss": 3.4288, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002961309972733074, | |
| "loss": 3.3411, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029611359285258455, | |
| "loss": 3.1286, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029609618843186167, | |
| "loss": 2.9427, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002960787840111388, | |
| "loss": 3.4088, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002960613795904159, | |
| "loss": 3.3628, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029604397516969305, | |
| "loss": 3.4993, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002960265707489702, | |
| "loss": 3.6364, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029600916632824736, | |
| "loss": 3.482, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002959917619075245, | |
| "loss": 3.4856, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002959743574868016, | |
| "loss": 3.4327, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029595695306607874, | |
| "loss": 3.4458, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029593954864535587, | |
| "loss": 3.3191, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029592214422463305, | |
| "loss": 3.4966, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002959047398039101, | |
| "loss": 3.1979, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002958873353831873, | |
| "loss": 3.3932, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029586993096246444, | |
| "loss": 3.1029, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029585252654174156, | |
| "loss": 3.2549, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002958351221210187, | |
| "loss": 3.3986, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002958177177002958, | |
| "loss": 3.22, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.000295800313279573, | |
| "loss": 3.0488, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002957829088588501, | |
| "loss": 3.5085, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029576550443812725, | |
| "loss": 3.2679, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002957481000174044, | |
| "loss": 3.4828, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002957306955966815, | |
| "loss": 3.4081, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002957132911759587, | |
| "loss": 3.3543, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029569588675523576, | |
| "loss": 3.6927, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029567848233451294, | |
| "loss": 3.215, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029566107791379007, | |
| "loss": 3.502, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002956436734930672, | |
| "loss": 3.4641, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002956262690723443, | |
| "loss": 3.2631, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029560886465162145, | |
| "loss": 3.3747, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029559146023089863, | |
| "loss": 3.3669, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029557405581017576, | |
| "loss": 2.8722, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002955566513894529, | |
| "loss": 3.4885, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029553924696873, | |
| "loss": 3.5238, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029552184254800714, | |
| "loss": 3.2318, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002955044381272843, | |
| "loss": 3.4054, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029548703370656145, | |
| "loss": 3.2475, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002954696292858386, | |
| "loss": 3.3495, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002954522248651157, | |
| "loss": 3.5605, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029543482044439283, | |
| "loss": 3.0016, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029541741602366996, | |
| "loss": 3.1471, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029540001160294714, | |
| "loss": 3.5481, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029538260718222427, | |
| "loss": 3.4854, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002953652027615014, | |
| "loss": 3.535, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002953477983407785, | |
| "loss": 3.2613, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029533039392005565, | |
| "loss": 3.3677, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029531298949933283, | |
| "loss": 3.2767, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029529558507860996, | |
| "loss": 3.4752, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002952781806578871, | |
| "loss": 3.6749, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002952607762371642, | |
| "loss": 3.2192, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029524337181644134, | |
| "loss": 3.2643, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002952259673957185, | |
| "loss": 3.5323, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002952085629749956, | |
| "loss": 3.1769, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002951911585542728, | |
| "loss": 3.4551, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002951737541335499, | |
| "loss": 3.387, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029515634971282703, | |
| "loss": 3.2243, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029513894529210416, | |
| "loss": 2.9179, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002951215408713813, | |
| "loss": 3.2905, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029510413645065847, | |
| "loss": 3.262, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002950867320299356, | |
| "loss": 3.2404, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002950693276092127, | |
| "loss": 3.577, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029505192318848985, | |
| "loss": 3.5627, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.000295034518767767, | |
| "loss": 3.5328, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029501711434704416, | |
| "loss": 3.5149, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029499970992632123, | |
| "loss": 3.2421, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002949823055055984, | |
| "loss": 3.0618, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029496490108487554, | |
| "loss": 3.2825, | |
| "step": 3000 | |
| } | |
| ], | |
| "max_steps": 172470, | |
| "num_train_epochs": 10, | |
| "total_flos": 6.54510518697984e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |