| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.996599690880989, | |
| "eval_steps": 500, | |
| "global_step": 2424, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012364760432766615, | |
| "grad_norm": 0.04910803958773613, | |
| "learning_rate": 0.00029876237623762373, | |
| "loss": 1.0084, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02472952086553323, | |
| "grad_norm": 0.037698596715927124, | |
| "learning_rate": 0.0002975247524752475, | |
| "loss": 0.9609, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03709428129829984, | |
| "grad_norm": 0.03782161325216293, | |
| "learning_rate": 0.00029628712871287126, | |
| "loss": 0.9033, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04945904173106646, | |
| "grad_norm": 0.04191258177161217, | |
| "learning_rate": 0.000295049504950495, | |
| "loss": 0.9154, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.061823802163833076, | |
| "grad_norm": 0.0391441248357296, | |
| "learning_rate": 0.0002938118811881188, | |
| "loss": 0.9001, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07418856259659969, | |
| "grad_norm": 0.04036989435553551, | |
| "learning_rate": 0.00029257425742574254, | |
| "loss": 0.9069, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0865533230293663, | |
| "grad_norm": 0.036793895065784454, | |
| "learning_rate": 0.0002913366336633663, | |
| "loss": 0.8903, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09891808346213292, | |
| "grad_norm": 0.04941694810986519, | |
| "learning_rate": 0.00029009900990099006, | |
| "loss": 0.928, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11128284389489954, | |
| "grad_norm": 0.03952586278319359, | |
| "learning_rate": 0.0002888613861386138, | |
| "loss": 0.9121, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.12364760432766615, | |
| "grad_norm": 0.04417801648378372, | |
| "learning_rate": 0.0002876237623762376, | |
| "loss": 0.8918, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13601236476043277, | |
| "grad_norm": 0.03843015059828758, | |
| "learning_rate": 0.00028638613861386135, | |
| "loss": 0.8988, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14837712519319937, | |
| "grad_norm": 0.035807665437459946, | |
| "learning_rate": 0.0002851485148514851, | |
| "loss": 0.899, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.160741885625966, | |
| "grad_norm": 0.041015319526195526, | |
| "learning_rate": 0.00028391089108910887, | |
| "loss": 0.8937, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1731066460587326, | |
| "grad_norm": 0.04108859598636627, | |
| "learning_rate": 0.00028267326732673263, | |
| "loss": 0.894, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18547140649149924, | |
| "grad_norm": 0.03874868154525757, | |
| "learning_rate": 0.0002814356435643564, | |
| "loss": 0.9088, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19783616692426584, | |
| "grad_norm": 0.03931848704814911, | |
| "learning_rate": 0.00028019801980198015, | |
| "loss": 0.9079, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21020092735703247, | |
| "grad_norm": 0.04638601467013359, | |
| "learning_rate": 0.0002789603960396039, | |
| "loss": 0.9057, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.22256568778979907, | |
| "grad_norm": 0.05649528279900551, | |
| "learning_rate": 0.00027772277227722773, | |
| "loss": 0.872, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23493044822256567, | |
| "grad_norm": 0.043013814836740494, | |
| "learning_rate": 0.00027648514851485144, | |
| "loss": 0.9102, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2472952086553323, | |
| "grad_norm": 0.04763510450720787, | |
| "learning_rate": 0.0002752475247524752, | |
| "loss": 0.8894, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2596599690880989, | |
| "grad_norm": 0.03904448449611664, | |
| "learning_rate": 0.000274009900990099, | |
| "loss": 0.8856, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27202472952086554, | |
| "grad_norm": 0.04328664019703865, | |
| "learning_rate": 0.0002727722772277227, | |
| "loss": 0.8967, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.28438948995363217, | |
| "grad_norm": 0.04718885198235512, | |
| "learning_rate": 0.0002715346534653465, | |
| "loss": 0.892, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.29675425038639874, | |
| "grad_norm": 0.048305947333574295, | |
| "learning_rate": 0.0002702970297029703, | |
| "loss": 0.8896, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3091190108191654, | |
| "grad_norm": 0.04656008258461952, | |
| "learning_rate": 0.000269059405940594, | |
| "loss": 0.8942, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.321483771251932, | |
| "grad_norm": 0.041436970233917236, | |
| "learning_rate": 0.00026782178217821777, | |
| "loss": 0.892, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33384853168469864, | |
| "grad_norm": 0.043510112911462784, | |
| "learning_rate": 0.0002665841584158416, | |
| "loss": 0.8813, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3462132921174652, | |
| "grad_norm": 0.039642177522182465, | |
| "learning_rate": 0.00026534653465346534, | |
| "loss": 0.8884, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.35857805255023184, | |
| "grad_norm": 0.043365489691495895, | |
| "learning_rate": 0.00026410891089108905, | |
| "loss": 0.8963, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37094281298299847, | |
| "grad_norm": 0.04440496489405632, | |
| "learning_rate": 0.00026287128712871287, | |
| "loss": 0.8865, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38330757341576505, | |
| "grad_norm": 0.04222091659903526, | |
| "learning_rate": 0.00026163366336633663, | |
| "loss": 0.8935, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3956723338485317, | |
| "grad_norm": 0.04118286073207855, | |
| "learning_rate": 0.00026039603960396033, | |
| "loss": 0.8742, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4080370942812983, | |
| "grad_norm": 0.04324512556195259, | |
| "learning_rate": 0.00025915841584158415, | |
| "loss": 0.8918, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.42040185471406494, | |
| "grad_norm": 0.04111215099692345, | |
| "learning_rate": 0.0002579207920792079, | |
| "loss": 0.8795, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4327666151468315, | |
| "grad_norm": 0.04449688270688057, | |
| "learning_rate": 0.0002566831683168316, | |
| "loss": 0.8889, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.44513137557959814, | |
| "grad_norm": 0.04658028110861778, | |
| "learning_rate": 0.00025544554455445543, | |
| "loss": 0.8798, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4574961360123648, | |
| "grad_norm": 0.037669096142053604, | |
| "learning_rate": 0.0002542079207920792, | |
| "loss": 0.8809, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.46986089644513135, | |
| "grad_norm": 0.04158737137913704, | |
| "learning_rate": 0.00025297029702970296, | |
| "loss": 0.8922, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.482225656877898, | |
| "grad_norm": 0.047567520290613174, | |
| "learning_rate": 0.0002517326732673267, | |
| "loss": 0.8949, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4945904173106646, | |
| "grad_norm": 0.057194869965314865, | |
| "learning_rate": 0.0002504950495049505, | |
| "loss": 0.8774, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5069551777434312, | |
| "grad_norm": 0.04181217402219772, | |
| "learning_rate": 0.00024925742574257424, | |
| "loss": 0.8907, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5193199381761978, | |
| "grad_norm": 0.045876242220401764, | |
| "learning_rate": 0.000248019801980198, | |
| "loss": 0.8841, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5316846986089645, | |
| "grad_norm": 0.041932158172130585, | |
| "learning_rate": 0.00024678217821782176, | |
| "loss": 0.8582, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5440494590417311, | |
| "grad_norm": 0.044740475714206696, | |
| "learning_rate": 0.0002455445544554455, | |
| "loss": 0.9204, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5564142194744977, | |
| "grad_norm": 0.04608389362692833, | |
| "learning_rate": 0.0002443069306930693, | |
| "loss": 0.8859, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5687789799072643, | |
| "grad_norm": 0.04750910773873329, | |
| "learning_rate": 0.00024306930693069305, | |
| "loss": 0.8786, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5811437403400309, | |
| "grad_norm": 0.0407898910343647, | |
| "learning_rate": 0.0002418316831683168, | |
| "loss": 0.8731, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5935085007727975, | |
| "grad_norm": 0.047790151089429855, | |
| "learning_rate": 0.0002405940594059406, | |
| "loss": 0.8984, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6058732612055642, | |
| "grad_norm": 0.04182444140315056, | |
| "learning_rate": 0.00023935643564356433, | |
| "loss": 0.8865, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6182380216383307, | |
| "grad_norm": 0.04374885559082031, | |
| "learning_rate": 0.0002381188118811881, | |
| "loss": 0.8736, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6306027820710973, | |
| "grad_norm": 0.04540247470140457, | |
| "learning_rate": 0.00023688118811881188, | |
| "loss": 0.8977, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.642967542503864, | |
| "grad_norm": 0.039125751703977585, | |
| "learning_rate": 0.00023564356435643561, | |
| "loss": 0.8955, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6553323029366306, | |
| "grad_norm": 0.04842868447303772, | |
| "learning_rate": 0.00023440594059405938, | |
| "loss": 0.8979, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6676970633693973, | |
| "grad_norm": 0.04414287582039833, | |
| "learning_rate": 0.00023316831683168316, | |
| "loss": 0.9063, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6800618238021638, | |
| "grad_norm": 0.05018250271677971, | |
| "learning_rate": 0.0002319306930693069, | |
| "loss": 0.894, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6924265842349304, | |
| "grad_norm": 0.04726792126893997, | |
| "learning_rate": 0.00023069306930693066, | |
| "loss": 0.8716, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7047913446676971, | |
| "grad_norm": 0.049401551485061646, | |
| "learning_rate": 0.00022945544554455445, | |
| "loss": 0.9016, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7171561051004637, | |
| "grad_norm": 0.049783241003751755, | |
| "learning_rate": 0.0002282178217821782, | |
| "loss": 0.8774, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7295208655332303, | |
| "grad_norm": 0.04755168408155441, | |
| "learning_rate": 0.00022698019801980194, | |
| "loss": 0.8814, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7418856259659969, | |
| "grad_norm": 0.04885553568601608, | |
| "learning_rate": 0.00022574257425742573, | |
| "loss": 0.8744, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7542503863987635, | |
| "grad_norm": 0.04771718755364418, | |
| "learning_rate": 0.0002245049504950495, | |
| "loss": 0.9001, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7666151468315301, | |
| "grad_norm": 0.04642605781555176, | |
| "learning_rate": 0.00022326732673267323, | |
| "loss": 0.88, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7789799072642968, | |
| "grad_norm": 0.047350749373435974, | |
| "learning_rate": 0.00022202970297029702, | |
| "loss": 0.8928, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7913446676970634, | |
| "grad_norm": 0.04467844218015671, | |
| "learning_rate": 0.00022079207920792078, | |
| "loss": 0.885, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.80370942812983, | |
| "grad_norm": 0.04457986727356911, | |
| "learning_rate": 0.0002195544554455445, | |
| "loss": 0.888, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8160741885625966, | |
| "grad_norm": 0.04410697519779205, | |
| "learning_rate": 0.0002183168316831683, | |
| "loss": 0.8888, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8284389489953632, | |
| "grad_norm": 0.0475030243396759, | |
| "learning_rate": 0.00021707920792079206, | |
| "loss": 0.9009, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8408037094281299, | |
| "grad_norm": 0.043028101325035095, | |
| "learning_rate": 0.00021584158415841585, | |
| "loss": 0.8735, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8531684698608965, | |
| "grad_norm": 0.04463913291692734, | |
| "learning_rate": 0.00021460396039603958, | |
| "loss": 0.8742, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.865533230293663, | |
| "grad_norm": 0.04648848995566368, | |
| "learning_rate": 0.00021336633663366334, | |
| "loss": 0.8899, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8778979907264297, | |
| "grad_norm": 0.04463621601462364, | |
| "learning_rate": 0.00021212871287128713, | |
| "loss": 0.8887, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8902627511591963, | |
| "grad_norm": 0.04241452366113663, | |
| "learning_rate": 0.00021089108910891087, | |
| "loss": 0.8749, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9026275115919629, | |
| "grad_norm": 0.04464114084839821, | |
| "learning_rate": 0.00020965346534653463, | |
| "loss": 0.8774, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.9149922720247295, | |
| "grad_norm": 0.04345027729868889, | |
| "learning_rate": 0.00020841584158415842, | |
| "loss": 0.8753, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9273570324574961, | |
| "grad_norm": 0.048532094806432724, | |
| "learning_rate": 0.00020717821782178215, | |
| "loss": 0.8946, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9397217928902627, | |
| "grad_norm": 0.04126739129424095, | |
| "learning_rate": 0.0002059405940594059, | |
| "loss": 0.903, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9520865533230294, | |
| "grad_norm": 0.04423375427722931, | |
| "learning_rate": 0.0002047029702970297, | |
| "loss": 0.8843, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.964451313755796, | |
| "grad_norm": 0.04136930778622627, | |
| "learning_rate": 0.00020346534653465346, | |
| "loss": 0.8757, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9768160741885626, | |
| "grad_norm": 0.05331163853406906, | |
| "learning_rate": 0.0002022277227722772, | |
| "loss": 0.8842, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.9891808346213292, | |
| "grad_norm": 0.04790889099240303, | |
| "learning_rate": 0.00020099009900990098, | |
| "loss": 0.8814, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0012364760432766, | |
| "grad_norm": 0.05177275091409683, | |
| "learning_rate": 0.00019975247524752475, | |
| "loss": 0.8858, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.0136012364760432, | |
| "grad_norm": 0.0411980040371418, | |
| "learning_rate": 0.00019851485148514848, | |
| "loss": 0.8461, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.02596599690881, | |
| "grad_norm": 0.04518349468708038, | |
| "learning_rate": 0.00019727722772277227, | |
| "loss": 0.8547, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0383307573415765, | |
| "grad_norm": 0.047048419713974, | |
| "learning_rate": 0.00019603960396039603, | |
| "loss": 0.8502, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.0506955177743431, | |
| "grad_norm": 0.04998902231454849, | |
| "learning_rate": 0.00019480198019801976, | |
| "loss": 0.8584, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0630602782071097, | |
| "grad_norm": 0.05004483088850975, | |
| "learning_rate": 0.00019356435643564355, | |
| "loss": 0.8787, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0754250386398763, | |
| "grad_norm": 0.0483798012137413, | |
| "learning_rate": 0.0001923267326732673, | |
| "loss": 0.8732, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.087789799072643, | |
| "grad_norm": 0.048114124685525894, | |
| "learning_rate": 0.00019108910891089107, | |
| "loss": 0.8773, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.1001545595054096, | |
| "grad_norm": 0.04553611949086189, | |
| "learning_rate": 0.00018985148514851484, | |
| "loss": 0.8646, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.1125193199381762, | |
| "grad_norm": 0.052288319915533066, | |
| "learning_rate": 0.0001886138613861386, | |
| "loss": 0.8592, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1248840803709428, | |
| "grad_norm": 0.05070117861032486, | |
| "learning_rate": 0.00018737623762376236, | |
| "loss": 0.8565, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1372488408037094, | |
| "grad_norm": 0.049008361995220184, | |
| "learning_rate": 0.00018613861386138612, | |
| "loss": 0.8783, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1496136012364762, | |
| "grad_norm": 0.04916449636220932, | |
| "learning_rate": 0.00018490099009900988, | |
| "loss": 0.8668, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.1619783616692427, | |
| "grad_norm": 0.05646826699376106, | |
| "learning_rate": 0.00018366336633663364, | |
| "loss": 0.858, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.1743431221020093, | |
| "grad_norm": 0.05039024353027344, | |
| "learning_rate": 0.0001824257425742574, | |
| "loss": 0.8687, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1867078825347759, | |
| "grad_norm": 0.052257779985666275, | |
| "learning_rate": 0.00018118811881188116, | |
| "loss": 0.8731, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1990726429675425, | |
| "grad_norm": 0.04960246384143829, | |
| "learning_rate": 0.00017995049504950493, | |
| "loss": 0.8346, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.211437403400309, | |
| "grad_norm": 0.05193152651190758, | |
| "learning_rate": 0.00017871287128712871, | |
| "loss": 0.8656, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.2238021638330758, | |
| "grad_norm": 0.05180949717760086, | |
| "learning_rate": 0.00017747524752475245, | |
| "loss": 0.8542, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2361669242658424, | |
| "grad_norm": 0.05225878953933716, | |
| "learning_rate": 0.0001762376237623762, | |
| "loss": 0.8628, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.248531684698609, | |
| "grad_norm": 0.05485387519001961, | |
| "learning_rate": 0.000175, | |
| "loss": 0.8746, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2608964451313756, | |
| "grad_norm": 0.06754795461893082, | |
| "learning_rate": 0.00017376237623762373, | |
| "loss": 0.8702, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2732612055641421, | |
| "grad_norm": 0.05525548383593559, | |
| "learning_rate": 0.00017252475247524752, | |
| "loss": 0.863, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.2856259659969087, | |
| "grad_norm": 0.05193280428647995, | |
| "learning_rate": 0.00017128712871287128, | |
| "loss": 0.8389, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2979907264296755, | |
| "grad_norm": 0.04822159186005592, | |
| "learning_rate": 0.00017004950495049502, | |
| "loss": 0.8665, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.310355486862442, | |
| "grad_norm": 0.05497356876730919, | |
| "learning_rate": 0.0001688118811881188, | |
| "loss": 0.8635, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.3227202472952087, | |
| "grad_norm": 0.05118054896593094, | |
| "learning_rate": 0.00016757425742574257, | |
| "loss": 0.8483, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.3350850077279752, | |
| "grad_norm": 0.051902711391448975, | |
| "learning_rate": 0.00016633663366336633, | |
| "loss": 0.8478, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.3474497681607418, | |
| "grad_norm": 0.049953706562519073, | |
| "learning_rate": 0.0001650990099009901, | |
| "loss": 0.8569, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3598145285935086, | |
| "grad_norm": 0.09028486907482147, | |
| "learning_rate": 0.00016386138613861385, | |
| "loss": 0.8465, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3721792890262752, | |
| "grad_norm": 0.05248475819826126, | |
| "learning_rate": 0.0001626237623762376, | |
| "loss": 0.8707, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.3845440494590417, | |
| "grad_norm": 0.05470622703433037, | |
| "learning_rate": 0.00016138613861386137, | |
| "loss": 0.8581, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.3969088098918083, | |
| "grad_norm": 0.051429346203804016, | |
| "learning_rate": 0.00016014851485148513, | |
| "loss": 0.867, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.409273570324575, | |
| "grad_norm": 0.05353890359401703, | |
| "learning_rate": 0.0001589108910891089, | |
| "loss": 0.8489, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.4216383307573417, | |
| "grad_norm": 0.0630929172039032, | |
| "learning_rate": 0.00015767326732673266, | |
| "loss": 0.8575, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4340030911901083, | |
| "grad_norm": 0.0524783730506897, | |
| "learning_rate": 0.00015643564356435642, | |
| "loss": 0.8527, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.4463678516228748, | |
| "grad_norm": 0.05413209646940231, | |
| "learning_rate": 0.00015519801980198018, | |
| "loss": 0.8786, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.4587326120556414, | |
| "grad_norm": 0.055751536041498184, | |
| "learning_rate": 0.00015396039603960397, | |
| "loss": 0.872, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.471097372488408, | |
| "grad_norm": 0.05271457880735397, | |
| "learning_rate": 0.0001527227722772277, | |
| "loss": 0.8734, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4834621329211746, | |
| "grad_norm": 0.04827325418591499, | |
| "learning_rate": 0.00015148514851485146, | |
| "loss": 0.8488, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4958268933539411, | |
| "grad_norm": 0.05717690661549568, | |
| "learning_rate": 0.00015024752475247525, | |
| "loss": 0.8732, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.508191653786708, | |
| "grad_norm": 0.055509038269519806, | |
| "learning_rate": 0.000149009900990099, | |
| "loss": 0.8675, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.5205564142194745, | |
| "grad_norm": 0.05562078580260277, | |
| "learning_rate": 0.00014777227722772275, | |
| "loss": 0.8644, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.532921174652241, | |
| "grad_norm": 0.046674925833940506, | |
| "learning_rate": 0.00014653465346534653, | |
| "loss": 0.8429, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.545285935085008, | |
| "grad_norm": 0.053251732140779495, | |
| "learning_rate": 0.0001452970297029703, | |
| "loss": 0.849, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5576506955177742, | |
| "grad_norm": 0.05253510922193527, | |
| "learning_rate": 0.00014405940594059403, | |
| "loss": 0.8445, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.570015455950541, | |
| "grad_norm": 0.05021601915359497, | |
| "learning_rate": 0.00014282178217821782, | |
| "loss": 0.8668, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5823802163833076, | |
| "grad_norm": 0.052446555346250534, | |
| "learning_rate": 0.00014158415841584158, | |
| "loss": 0.8733, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5947449768160742, | |
| "grad_norm": 0.056364450603723526, | |
| "learning_rate": 0.00014034653465346534, | |
| "loss": 0.8823, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.6071097372488408, | |
| "grad_norm": 0.05288272723555565, | |
| "learning_rate": 0.0001391089108910891, | |
| "loss": 0.8678, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.6194744976816073, | |
| "grad_norm": 0.054042939096689224, | |
| "learning_rate": 0.00013787128712871286, | |
| "loss": 0.8439, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.6318392581143741, | |
| "grad_norm": 0.051554158329963684, | |
| "learning_rate": 0.00013663366336633662, | |
| "loss": 0.8514, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.6442040185471405, | |
| "grad_norm": 0.04892382398247719, | |
| "learning_rate": 0.00013539603960396039, | |
| "loss": 0.834, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6565687789799073, | |
| "grad_norm": 0.05448554828763008, | |
| "learning_rate": 0.00013415841584158415, | |
| "loss": 0.8484, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.6689335394126739, | |
| "grad_norm": 0.056680306792259216, | |
| "learning_rate": 0.0001329207920792079, | |
| "loss": 0.8495, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6812982998454404, | |
| "grad_norm": 0.05566761642694473, | |
| "learning_rate": 0.00013168316831683167, | |
| "loss": 0.856, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.6936630602782072, | |
| "grad_norm": 0.04952670633792877, | |
| "learning_rate": 0.00013044554455445543, | |
| "loss": 0.8405, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.7060278207109736, | |
| "grad_norm": 0.05578543245792389, | |
| "learning_rate": 0.0001292079207920792, | |
| "loss": 0.8555, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.7183925811437404, | |
| "grad_norm": 0.05533617362380028, | |
| "learning_rate": 0.00012797029702970295, | |
| "loss": 0.8558, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.730757341576507, | |
| "grad_norm": 0.05991559103131294, | |
| "learning_rate": 0.00012673267326732672, | |
| "loss": 0.8636, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.7431221020092735, | |
| "grad_norm": 0.054518427699804306, | |
| "learning_rate": 0.00012549504950495048, | |
| "loss": 0.8471, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.7554868624420403, | |
| "grad_norm": 0.04764275252819061, | |
| "learning_rate": 0.00012425742574257426, | |
| "loss": 0.8449, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7678516228748067, | |
| "grad_norm": 0.058475952595472336, | |
| "learning_rate": 0.000123019801980198, | |
| "loss": 0.8523, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7802163833075735, | |
| "grad_norm": 0.05991446226835251, | |
| "learning_rate": 0.00012178217821782177, | |
| "loss": 0.8564, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.79258114374034, | |
| "grad_norm": 0.0623490996658802, | |
| "learning_rate": 0.00012054455445544554, | |
| "loss": 0.8404, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.8049459041731066, | |
| "grad_norm": 0.04905753955245018, | |
| "learning_rate": 0.0001193069306930693, | |
| "loss": 0.8474, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.8173106646058734, | |
| "grad_norm": 0.05426807701587677, | |
| "learning_rate": 0.00011806930693069306, | |
| "loss": 0.8766, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.8296754250386398, | |
| "grad_norm": 0.0476132333278656, | |
| "learning_rate": 0.00011683168316831682, | |
| "loss": 0.815, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.8420401854714066, | |
| "grad_norm": 0.05849111080169678, | |
| "learning_rate": 0.0001155940594059406, | |
| "loss": 0.854, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.8544049459041732, | |
| "grad_norm": 0.05493124946951866, | |
| "learning_rate": 0.00011435643564356434, | |
| "loss": 0.8563, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.8667697063369397, | |
| "grad_norm": 0.05999801307916641, | |
| "learning_rate": 0.0001131188118811881, | |
| "loss": 0.8498, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.8791344667697063, | |
| "grad_norm": 0.058151423931121826, | |
| "learning_rate": 0.00011188118811881188, | |
| "loss": 0.8645, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8914992272024729, | |
| "grad_norm": 0.05524227395653725, | |
| "learning_rate": 0.00011064356435643564, | |
| "loss": 0.8624, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.9038639876352397, | |
| "grad_norm": 0.06369632482528687, | |
| "learning_rate": 0.00010940594059405939, | |
| "loss": 0.8695, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.916228748068006, | |
| "grad_norm": 0.057092998176813126, | |
| "learning_rate": 0.00010816831683168316, | |
| "loss": 0.844, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.9285935085007728, | |
| "grad_norm": 0.05554778128862381, | |
| "learning_rate": 0.00010693069306930692, | |
| "loss": 0.8543, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.9409582689335394, | |
| "grad_norm": 0.05691225454211235, | |
| "learning_rate": 0.00010569306930693068, | |
| "loss": 0.8714, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.953323029366306, | |
| "grad_norm": 0.0564524307847023, | |
| "learning_rate": 0.00010445544554455445, | |
| "loss": 0.8574, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9656877897990728, | |
| "grad_norm": 0.0588836595416069, | |
| "learning_rate": 0.0001032178217821782, | |
| "loss": 0.8558, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9780525502318391, | |
| "grad_norm": 0.05634515732526779, | |
| "learning_rate": 0.00010198019801980197, | |
| "loss": 0.8444, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.990417310664606, | |
| "grad_norm": 0.055482737720012665, | |
| "learning_rate": 0.00010074257425742573, | |
| "loss": 0.8672, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.002472952086553, | |
| "grad_norm": 0.054257094860076904, | |
| "learning_rate": 9.95049504950495e-05, | |
| "loss": 0.8579, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.01483771251932, | |
| "grad_norm": 0.05709832161664963, | |
| "learning_rate": 9.826732673267325e-05, | |
| "loss": 0.8414, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.0272024729520863, | |
| "grad_norm": 0.05785168707370758, | |
| "learning_rate": 9.702970297029701e-05, | |
| "loss": 0.8223, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.039567233384853, | |
| "grad_norm": 0.060052480548620224, | |
| "learning_rate": 9.579207920792079e-05, | |
| "loss": 0.8371, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.05193199381762, | |
| "grad_norm": 0.06388446688652039, | |
| "learning_rate": 9.455445544554454e-05, | |
| "loss": 0.8308, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.0642967542503863, | |
| "grad_norm": 0.05495399236679077, | |
| "learning_rate": 9.331683168316831e-05, | |
| "loss": 0.8582, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.076661514683153, | |
| "grad_norm": 0.0544477179646492, | |
| "learning_rate": 9.207920792079207e-05, | |
| "loss": 0.8383, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.0890262751159194, | |
| "grad_norm": 0.06450890749692917, | |
| "learning_rate": 9.084158415841582e-05, | |
| "loss": 0.8359, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.1013910355486862, | |
| "grad_norm": 0.054119642823934555, | |
| "learning_rate": 8.96039603960396e-05, | |
| "loss": 0.8363, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.113755795981453, | |
| "grad_norm": 0.05726737529039383, | |
| "learning_rate": 8.836633663366336e-05, | |
| "loss": 0.8169, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.1261205564142194, | |
| "grad_norm": 0.0577755868434906, | |
| "learning_rate": 8.712871287128713e-05, | |
| "loss": 0.8586, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.138485316846986, | |
| "grad_norm": 0.06451012194156647, | |
| "learning_rate": 8.589108910891088e-05, | |
| "loss": 0.834, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.1508500772797525, | |
| "grad_norm": 0.06303463876247406, | |
| "learning_rate": 8.465346534653464e-05, | |
| "loss": 0.8333, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.1632148377125193, | |
| "grad_norm": 0.058561887592077255, | |
| "learning_rate": 8.341584158415841e-05, | |
| "loss": 0.8321, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.175579598145286, | |
| "grad_norm": 0.05364146828651428, | |
| "learning_rate": 8.217821782178216e-05, | |
| "loss": 0.8428, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.1879443585780525, | |
| "grad_norm": 0.063669353723526, | |
| "learning_rate": 8.094059405940594e-05, | |
| "loss": 0.8527, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.2003091190108193, | |
| "grad_norm": 0.05790480971336365, | |
| "learning_rate": 7.97029702970297e-05, | |
| "loss": 0.8261, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.2126738794435856, | |
| "grad_norm": 0.06101266294717789, | |
| "learning_rate": 7.846534653465345e-05, | |
| "loss": 0.8075, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.2250386398763524, | |
| "grad_norm": 0.06296826899051666, | |
| "learning_rate": 7.722772277227722e-05, | |
| "loss": 0.8284, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.237403400309119, | |
| "grad_norm": 0.0548894926905632, | |
| "learning_rate": 7.599009900990098e-05, | |
| "loss": 0.8385, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.2497681607418856, | |
| "grad_norm": 0.06245751306414604, | |
| "learning_rate": 7.475247524752474e-05, | |
| "loss": 0.8188, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.2621329211746524, | |
| "grad_norm": 0.06896353513002396, | |
| "learning_rate": 7.35148514851485e-05, | |
| "loss": 0.8229, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.2744976816074187, | |
| "grad_norm": 0.06569264829158783, | |
| "learning_rate": 7.227722772277227e-05, | |
| "loss": 0.8398, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.2868624420401855, | |
| "grad_norm": 0.06732139736413956, | |
| "learning_rate": 7.103960396039604e-05, | |
| "loss": 0.8439, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.2992272024729523, | |
| "grad_norm": 0.06835715472698212, | |
| "learning_rate": 6.98019801980198e-05, | |
| "loss": 0.821, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.3115919629057187, | |
| "grad_norm": 0.05850212648510933, | |
| "learning_rate": 6.856435643564355e-05, | |
| "loss": 0.8235, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.3239567233384855, | |
| "grad_norm": 0.06048553064465523, | |
| "learning_rate": 6.732673267326732e-05, | |
| "loss": 0.844, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.336321483771252, | |
| "grad_norm": 0.05443299934267998, | |
| "learning_rate": 6.608910891089109e-05, | |
| "loss": 0.8173, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.3486862442040186, | |
| "grad_norm": 0.06576599180698395, | |
| "learning_rate": 6.485148514851485e-05, | |
| "loss": 0.826, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.361051004636785, | |
| "grad_norm": 0.06261160224676132, | |
| "learning_rate": 6.361386138613861e-05, | |
| "loss": 0.8571, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.3734157650695518, | |
| "grad_norm": 0.05812652036547661, | |
| "learning_rate": 6.237623762376237e-05, | |
| "loss": 0.8227, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.3857805255023186, | |
| "grad_norm": 0.06309802830219269, | |
| "learning_rate": 6.113861386138613e-05, | |
| "loss": 0.8412, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.398145285935085, | |
| "grad_norm": 0.06207476556301117, | |
| "learning_rate": 5.99009900990099e-05, | |
| "loss": 0.8386, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.4105100463678517, | |
| "grad_norm": 0.05841566249728203, | |
| "learning_rate": 5.866336633663366e-05, | |
| "loss": 0.828, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.422874806800618, | |
| "grad_norm": 0.05857423320412636, | |
| "learning_rate": 5.742574257425742e-05, | |
| "loss": 0.8198, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.435239567233385, | |
| "grad_norm": 0.06476933509111404, | |
| "learning_rate": 5.618811881188118e-05, | |
| "loss": 0.84, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.4476043276661517, | |
| "grad_norm": 0.06856492906808853, | |
| "learning_rate": 5.4950495049504944e-05, | |
| "loss": 0.8386, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.459969088098918, | |
| "grad_norm": 0.0675152987241745, | |
| "learning_rate": 5.371287128712871e-05, | |
| "loss": 0.8603, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.472333848531685, | |
| "grad_norm": 0.059057943522930145, | |
| "learning_rate": 5.247524752475247e-05, | |
| "loss": 0.8254, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.484698608964451, | |
| "grad_norm": 0.06778612732887268, | |
| "learning_rate": 5.1237623762376234e-05, | |
| "loss": 0.829, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.497063369397218, | |
| "grad_norm": 0.0652635246515274, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 0.8321, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.5094281298299848, | |
| "grad_norm": 0.0605316124856472, | |
| "learning_rate": 4.876237623762376e-05, | |
| "loss": 0.8458, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.521792890262751, | |
| "grad_norm": 0.06351178884506226, | |
| "learning_rate": 4.752475247524752e-05, | |
| "loss": 0.8199, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.534157650695518, | |
| "grad_norm": 0.0644257590174675, | |
| "learning_rate": 4.6287128712871286e-05, | |
| "loss": 0.8311, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.5465224111282843, | |
| "grad_norm": 0.06502491235733032, | |
| "learning_rate": 4.504950495049505e-05, | |
| "loss": 0.8443, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.558887171561051, | |
| "grad_norm": 0.07183568179607391, | |
| "learning_rate": 4.38118811881188e-05, | |
| "loss": 0.825, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.5712519319938174, | |
| "grad_norm": 0.06714395433664322, | |
| "learning_rate": 4.257425742574257e-05, | |
| "loss": 0.8283, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.583616692426584, | |
| "grad_norm": 0.07098986953496933, | |
| "learning_rate": 4.133663366336633e-05, | |
| "loss": 0.8376, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.595981452859351, | |
| "grad_norm": 0.0671941488981247, | |
| "learning_rate": 4.00990099009901e-05, | |
| "loss": 0.8457, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.6083462132921174, | |
| "grad_norm": 0.07306034862995148, | |
| "learning_rate": 3.886138613861386e-05, | |
| "loss": 0.843, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.620710973724884, | |
| "grad_norm": 0.06762495636940002, | |
| "learning_rate": 3.7623762376237615e-05, | |
| "loss": 0.8188, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.633075734157651, | |
| "grad_norm": 0.06061069294810295, | |
| "learning_rate": 3.638613861386138e-05, | |
| "loss": 0.8059, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.6454404945904173, | |
| "grad_norm": 0.0667000338435173, | |
| "learning_rate": 3.5148514851485144e-05, | |
| "loss": 0.8364, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.6578052550231837, | |
| "grad_norm": 0.058926161378622055, | |
| "learning_rate": 3.3910891089108906e-05, | |
| "loss": 0.8267, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.6701700154559505, | |
| "grad_norm": 0.05975179746747017, | |
| "learning_rate": 3.267326732673267e-05, | |
| "loss": 0.8018, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.6825347758887172, | |
| "grad_norm": 0.06300190091133118, | |
| "learning_rate": 3.1435643564356435e-05, | |
| "loss": 0.8306, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.6948995363214836, | |
| "grad_norm": 0.06579259783029556, | |
| "learning_rate": 3.0198019801980193e-05, | |
| "loss": 0.8385, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.7072642967542504, | |
| "grad_norm": 0.07062911242246628, | |
| "learning_rate": 2.8960396039603958e-05, | |
| "loss": 0.8422, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.719629057187017, | |
| "grad_norm": 0.06216396763920784, | |
| "learning_rate": 2.772277227722772e-05, | |
| "loss": 0.8292, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.7319938176197835, | |
| "grad_norm": 0.06445206701755524, | |
| "learning_rate": 2.6485148514851484e-05, | |
| "loss": 0.8406, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.7443585780525503, | |
| "grad_norm": 0.06448670476675034, | |
| "learning_rate": 2.5247524752475248e-05, | |
| "loss": 0.8171, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.7567233384853167, | |
| "grad_norm": 0.05858496576547623, | |
| "learning_rate": 2.4009900990099006e-05, | |
| "loss": 0.8383, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.7690880989180835, | |
| "grad_norm": 0.07208121567964554, | |
| "learning_rate": 2.277227722772277e-05, | |
| "loss": 0.8145, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.78145285935085, | |
| "grad_norm": 0.06663426011800766, | |
| "learning_rate": 2.1534653465346532e-05, | |
| "loss": 0.8293, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7938176197836166, | |
| "grad_norm": 0.06585463881492615, | |
| "learning_rate": 2.0297029702970297e-05, | |
| "loss": 0.8303, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.8061823802163834, | |
| "grad_norm": 0.06423688679933548, | |
| "learning_rate": 1.9059405940594058e-05, | |
| "loss": 0.8172, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.81854714064915, | |
| "grad_norm": 0.06450697034597397, | |
| "learning_rate": 1.782178217821782e-05, | |
| "loss": 0.8355, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.8309119010819166, | |
| "grad_norm": 0.05580071732401848, | |
| "learning_rate": 1.6584158415841584e-05, | |
| "loss": 0.8136, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.8432766615146834, | |
| "grad_norm": 0.06626173853874207, | |
| "learning_rate": 1.5346534653465345e-05, | |
| "loss": 0.8238, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8556414219474497, | |
| "grad_norm": 0.061952993273735046, | |
| "learning_rate": 1.4108910891089108e-05, | |
| "loss": 0.8179, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.8680061823802165, | |
| "grad_norm": 0.07288029789924622, | |
| "learning_rate": 1.287128712871287e-05, | |
| "loss": 0.8194, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.880370942812983, | |
| "grad_norm": 0.06706374138593674, | |
| "learning_rate": 1.1633663366336632e-05, | |
| "loss": 0.8611, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.8927357032457497, | |
| "grad_norm": 0.06370951235294342, | |
| "learning_rate": 1.0396039603960395e-05, | |
| "loss": 0.848, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.905100463678516, | |
| "grad_norm": 0.061200667172670364, | |
| "learning_rate": 9.158415841584158e-06, | |
| "loss": 0.817, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.917465224111283, | |
| "grad_norm": 0.07799932360649109, | |
| "learning_rate": 7.92079207920792e-06, | |
| "loss": 0.8401, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.9298299845440496, | |
| "grad_norm": 0.0740487277507782, | |
| "learning_rate": 6.683168316831683e-06, | |
| "loss": 0.8447, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.942194744976816, | |
| "grad_norm": 0.062499478459358215, | |
| "learning_rate": 5.445544554455446e-06, | |
| "loss": 0.8446, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.954559505409583, | |
| "grad_norm": 0.06899666786193848, | |
| "learning_rate": 4.207920792079208e-06, | |
| "loss": 0.8411, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.966924265842349, | |
| "grad_norm": 0.0634492039680481, | |
| "learning_rate": 2.97029702970297e-06, | |
| "loss": 0.8422, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.979289026275116, | |
| "grad_norm": 0.0700407549738884, | |
| "learning_rate": 1.7326732673267324e-06, | |
| "loss": 0.8468, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.9916537867078823, | |
| "grad_norm": 0.061774324625730515, | |
| "learning_rate": 4.95049504950495e-07, | |
| "loss": 0.8337, | |
| "step": 2420 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2424, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.723181741683245e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |