| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 176, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011363636363636364, |
| "grad_norm": 0.04888428873089485, |
| "learning_rate": 2.777777777777778e-06, |
| "loss": 0.9533, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.022727272727272728, |
| "grad_norm": 0.04118674609403888, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 0.8187, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.03409090909090909, |
| "grad_norm": 0.036867673437168666, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.7078, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.045454545454545456, |
| "grad_norm": 0.04847692066919062, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.987, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.056818181818181816, |
| "grad_norm": 0.05003862080856868, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 0.9577, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.06818181818181818, |
| "grad_norm": 0.04493399553279938, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.9205, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.07954545454545454, |
| "grad_norm": 0.044174232851205655, |
| "learning_rate": 1.9444444444444445e-05, |
| "loss": 0.7971, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.0537270325545959, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 0.875, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.10227272727272728, |
| "grad_norm": 0.047274651714941, |
| "learning_rate": 2.5e-05, |
| "loss": 0.7865, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.11363636363636363, |
| "grad_norm": 0.047438139846740904, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.7949, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.0486064821581454, |
| "learning_rate": 3.055555555555556e-05, |
| "loss": 0.9187, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.13636363636363635, |
| "grad_norm": 0.049768338512046005, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.927, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.14772727272727273, |
| "grad_norm": 0.050752786859426545, |
| "learning_rate": 3.611111111111111e-05, |
| "loss": 0.882, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.1590909090909091, |
| "grad_norm": 0.04238163918748248, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.8273, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.17045454545454544, |
| "grad_norm": 0.03389553366668896, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.801, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.03219935695711762, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.8313, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.19318181818181818, |
| "grad_norm": 0.027821411081551388, |
| "learning_rate": 4.722222222222222e-05, |
| "loss": 0.6832, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.20454545454545456, |
| "grad_norm": 0.028125631584449494, |
| "learning_rate": 5e-05, |
| "loss": 0.724, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.2159090909090909, |
| "grad_norm": 0.03372930491801512, |
| "learning_rate": 4.9995058244251644e-05, |
| "loss": 0.765, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 0.03207593160425088, |
| "learning_rate": 4.9980234930682546e-05, |
| "loss": 0.8319, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.23863636363636365, |
| "grad_norm": 0.04392253595539216, |
| "learning_rate": 4.995553591954832e-05, |
| "loss": 0.8572, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.043837015398686424, |
| "learning_rate": 4.99209709753674e-05, |
| "loss": 0.7994, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.26136363636363635, |
| "grad_norm": 0.038660838245880964, |
| "learning_rate": 4.9876553763060684e-05, |
| "loss": 0.7595, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.03958181928823714, |
| "learning_rate": 4.982230184254933e-05, |
| "loss": 0.7524, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.2840909090909091, |
| "grad_norm": 0.04692096023674341, |
| "learning_rate": 4.975823666181255e-05, |
| "loss": 0.7565, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.29545454545454547, |
| "grad_norm": 0.039034963022689224, |
| "learning_rate": 4.968438354840834e-05, |
| "loss": 0.8968, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.3068181818181818, |
| "grad_norm": 0.03472677387974209, |
| "learning_rate": 4.960077169946052e-05, |
| "loss": 0.7522, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.3181818181818182, |
| "grad_norm": 0.031244118470989204, |
| "learning_rate": 4.950743417011591e-05, |
| "loss": 0.7937, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.32954545454545453, |
| "grad_norm": 0.029092500515729473, |
| "learning_rate": 4.940440786047628e-05, |
| "loss": 0.667, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.3409090909090909, |
| "grad_norm": 0.028474671621308404, |
| "learning_rate": 4.929173350101025e-05, |
| "loss": 0.7255, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3522727272727273, |
| "grad_norm": 0.021514453472108053, |
| "learning_rate": 4.9169455636450935e-05, |
| "loss": 0.7683, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.01979063211912252, |
| "learning_rate": 4.903762260818551e-05, |
| "loss": 0.7236, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.019373274784309575, |
| "learning_rate": 4.889628653514402e-05, |
| "loss": 0.6894, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.38636363636363635, |
| "grad_norm": 0.01807357066294673, |
| "learning_rate": 4.874550329319457e-05, |
| "loss": 0.7558, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.3977272727272727, |
| "grad_norm": 0.01770418827474799, |
| "learning_rate": 4.8585332493053364e-05, |
| "loss": 0.6813, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.4090909090909091, |
| "grad_norm": 0.01675836399147525, |
| "learning_rate": 4.84158374567182e-05, |
| "loss": 0.7559, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.42045454545454547, |
| "grad_norm": 0.021175097986321438, |
| "learning_rate": 4.8237085192434676e-05, |
| "loss": 0.8176, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.4318181818181818, |
| "grad_norm": 0.017100608642967, |
| "learning_rate": 4.804914636820517e-05, |
| "loss": 0.7112, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.4431818181818182, |
| "grad_norm": 0.01683117973292665, |
| "learning_rate": 4.7852095283850866e-05, |
| "loss": 0.7345, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.019570606734413423, |
| "learning_rate": 4.764600984163808e-05, |
| "loss": 0.8125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4659090909090909, |
| "grad_norm": 0.015675238814520476, |
| "learning_rate": 4.743097151548031e-05, |
| "loss": 0.6994, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.4772727272727273, |
| "grad_norm": 0.019792988850059114, |
| "learning_rate": 4.72070653187283e-05, |
| "loss": 0.7837, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.48863636363636365, |
| "grad_norm": 0.015785852432543244, |
| "learning_rate": 4.6974379770560846e-05, |
| "loss": 0.6608, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.015533678745760247, |
| "learning_rate": 4.673300686098957e-05, |
| "loss": 0.7031, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.5113636363636364, |
| "grad_norm": 0.01921291960352695, |
| "learning_rate": 4.648304201449153e-05, |
| "loss": 0.8267, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.5227272727272727, |
| "grad_norm": 0.01614254754588724, |
| "learning_rate": 4.6224584052284106e-05, |
| "loss": 0.7058, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.5340909090909091, |
| "grad_norm": 0.019677063872610357, |
| "learning_rate": 4.5957735153256915e-05, |
| "loss": 0.6621, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.013519303189431392, |
| "learning_rate": 4.5682600813576435e-05, |
| "loss": 0.7478, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.5568181818181818, |
| "grad_norm": 0.013215293773761478, |
| "learning_rate": 4.539928980497903e-05, |
| "loss": 0.7034, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.5681818181818182, |
| "grad_norm": 0.013694552146445563, |
| "learning_rate": 4.510791413176912e-05, |
| "loss": 0.7448, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5795454545454546, |
| "grad_norm": 0.016162877179262552, |
| "learning_rate": 4.480858898653936e-05, |
| "loss": 0.6728, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.5909090909090909, |
| "grad_norm": 0.017056245721837348, |
| "learning_rate": 4.4501432704630305e-05, |
| "loss": 0.6714, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.6022727272727273, |
| "grad_norm": 0.01602874080265432, |
| "learning_rate": 4.41865667173477e-05, |
| "loss": 0.7667, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.6136363636363636, |
| "grad_norm": 0.01556595840862479, |
| "learning_rate": 4.386411550395576e-05, |
| "loss": 0.6934, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.014885950964239066, |
| "learning_rate": 4.353420654246546e-05, |
| "loss": 0.6909, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.01399086114921709, |
| "learning_rate": 4.319697025923736e-05, |
| "loss": 0.6817, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.6477272727272727, |
| "grad_norm": 0.016980372482516305, |
| "learning_rate": 4.285253997741875e-05, |
| "loss": 0.7485, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.6590909090909091, |
| "grad_norm": 0.01370669813419946, |
| "learning_rate": 4.2501051864235636e-05, |
| "loss": 0.5524, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.6704545454545454, |
| "grad_norm": 0.0161962614922305, |
| "learning_rate": 4.214264487716033e-05, |
| "loss": 0.7145, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 0.01432616251653496, |
| "learning_rate": 4.177746070897592e-05, |
| "loss": 0.7263, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6931818181818182, |
| "grad_norm": 0.01515485647301396, |
| "learning_rate": 4.140564373175939e-05, |
| "loss": 0.6531, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.7045454545454546, |
| "grad_norm": 0.014400704458474281, |
| "learning_rate": 4.10273409398055e-05, |
| "loss": 0.7696, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.7159090909090909, |
| "grad_norm": 0.015458672360864132, |
| "learning_rate": 4.0642701891514e-05, |
| "loss": 0.7081, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.015263330526513203, |
| "learning_rate": 4.025187865026311e-05, |
| "loss": 0.725, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.7386363636363636, |
| "grad_norm": 0.015125337784791405, |
| "learning_rate": 3.985502572429276e-05, |
| "loss": 0.7111, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.013985325014602094, |
| "learning_rate": 3.945230000562121e-05, |
| "loss": 0.7249, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.7613636363636364, |
| "grad_norm": 0.0155589237978013, |
| "learning_rate": 3.9043860708019273e-05, |
| "loss": 0.6881, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.7727272727272727, |
| "grad_norm": 0.01882918883166355, |
| "learning_rate": 3.862986930406669e-05, |
| "loss": 0.8102, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.7840909090909091, |
| "grad_norm": 0.015308078133970605, |
| "learning_rate": 3.821048946131549e-05, |
| "loss": 0.6919, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.7954545454545454, |
| "grad_norm": 0.01630125314346408, |
| "learning_rate": 3.778588697758556e-05, |
| "loss": 0.6464, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.8068181818181818, |
| "grad_norm": 0.015050157672318546, |
| "learning_rate": 3.7356229715418074e-05, |
| "loss": 0.7114, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.015186905856860966, |
| "learning_rate": 3.6921687535712656e-05, |
| "loss": 0.719, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.8295454545454546, |
| "grad_norm": 0.01935797435359767, |
| "learning_rate": 3.6482432230574446e-05, |
| "loss": 0.7255, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.8409090909090909, |
| "grad_norm": 0.016661093988049422, |
| "learning_rate": 3.60386374553978e-05, |
| "loss": 0.6642, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.8522727272727273, |
| "grad_norm": 0.017167771916012636, |
| "learning_rate": 3.5590478660213214e-05, |
| "loss": 0.677, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.8636363636363636, |
| "grad_norm": 0.015022481447368145, |
| "learning_rate": 3.5138133020324845e-05, |
| "loss": 0.7307, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.01785502395194187, |
| "learning_rate": 3.468177936626603e-05, |
| "loss": 0.789, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.8863636363636364, |
| "grad_norm": 0.01864810858161923, |
| "learning_rate": 3.4221598113100195e-05, |
| "loss": 0.7059, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.8977272727272727, |
| "grad_norm": 0.016575657529251383, |
| "learning_rate": 3.375777118909561e-05, |
| "loss": 0.7456, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.016188984904551812, |
| "learning_rate": 3.32904819638017e-05, |
| "loss": 0.683, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9204545454545454, |
| "grad_norm": 0.016436588560373562, |
| "learning_rate": 3.2819915175555684e-05, |
| "loss": 0.6475, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.9318181818181818, |
| "grad_norm": 0.017564054129248746, |
| "learning_rate": 3.234625685844803e-05, |
| "loss": 0.622, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.9431818181818182, |
| "grad_norm": 0.019239130214828547, |
| "learning_rate": 3.186969426877563e-05, |
| "loss": 0.638, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.9545454545454546, |
| "grad_norm": 0.01778558528784305, |
| "learning_rate": 3.139041581101187e-05, |
| "loss": 0.5941, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.9659090909090909, |
| "grad_norm": 0.03326719961885704, |
| "learning_rate": 3.090861096332263e-05, |
| "loss": 0.6721, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.9772727272727273, |
| "grad_norm": 0.017697778364759945, |
| "learning_rate": 3.042447020265795e-05, |
| "loss": 0.663, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.9886363636363636, |
| "grad_norm": 0.015032572277755535, |
| "learning_rate": 2.993818492944882e-05, |
| "loss": 0.6462, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.01697573905444168, |
| "learning_rate": 2.9449947391938766e-05, |
| "loss": 0.6957, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.0113636363636365, |
| "grad_norm": 0.01629821245832795, |
| "learning_rate": 2.8959950610180374e-05, |
| "loss": 0.6629, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.0227272727272727, |
| "grad_norm": 0.015172985524910379, |
| "learning_rate": 2.8468388299726712e-05, |
| "loss": 0.6602, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.0340909090909092, |
| "grad_norm": 0.015577909780648352, |
| "learning_rate": 2.7975454795047622e-05, |
| "loss": 0.796, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.0454545454545454, |
| "grad_norm": 0.017309986409190056, |
| "learning_rate": 2.7481344972701545e-05, |
| "loss": 0.6618, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.0568181818181819, |
| "grad_norm": 0.01661621611638882, |
| "learning_rate": 2.6986254174292862e-05, |
| "loss": 0.7065, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.0681818181818181, |
| "grad_norm": 0.016737287474769654, |
| "learning_rate": 2.6490378129245498e-05, |
| "loss": 0.6457, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.0795454545454546, |
| "grad_norm": 0.016921442525318752, |
| "learning_rate": 2.599391287742315e-05, |
| "loss": 0.7422, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.01644141496345415, |
| "learning_rate": 2.5497054691626753e-05, |
| "loss": 0.7125, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.1022727272727273, |
| "grad_norm": 0.01566715623525306, |
| "learning_rate": 2.5e-05, |
| "loss": 0.7015, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.1136363636363635, |
| "grad_norm": 0.016042962886209804, |
| "learning_rate": 2.4502945308373246e-05, |
| "loss": 0.6911, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.014451142623462782, |
| "learning_rate": 2.4006087122576863e-05, |
| "loss": 0.6499, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 0.016365537745248963, |
| "learning_rate": 2.3509621870754505e-05, |
| "loss": 0.6564, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.1477272727272727, |
| "grad_norm": 0.015223080653847816, |
| "learning_rate": 2.301374582570714e-05, |
| "loss": 0.6767, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.1590909090909092, |
| "grad_norm": 0.017013122218294784, |
| "learning_rate": 2.2518655027298464e-05, |
| "loss": 0.6712, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.1704545454545454, |
| "grad_norm": 0.017431829042961085, |
| "learning_rate": 2.2024545204952383e-05, |
| "loss": 0.6363, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.1818181818181819, |
| "grad_norm": 0.01638659813387577, |
| "learning_rate": 2.1531611700273297e-05, |
| "loss": 0.7341, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.1931818181818181, |
| "grad_norm": 0.015883920910595706, |
| "learning_rate": 2.104004938981963e-05, |
| "loss": 0.7503, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.2045454545454546, |
| "grad_norm": 0.016308759795444855, |
| "learning_rate": 2.055005260806125e-05, |
| "loss": 0.7494, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.2159090909090908, |
| "grad_norm": 0.01653825796554113, |
| "learning_rate": 2.0061815070551186e-05, |
| "loss": 0.584, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.2272727272727273, |
| "grad_norm": 0.015995947052341933, |
| "learning_rate": 1.957552979734205e-05, |
| "loss": 0.7943, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.2386363636363638, |
| "grad_norm": 0.018158719978620477, |
| "learning_rate": 1.9091389036677382e-05, |
| "loss": 0.7001, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.015286756025554763, |
| "learning_rate": 1.8609584188988136e-05, |
| "loss": 0.6132, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.2613636363636362, |
| "grad_norm": 0.014766283231039113, |
| "learning_rate": 1.813030573122437e-05, |
| "loss": 0.6789, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.018163850190524557, |
| "learning_rate": 1.7653743141551983e-05, |
| "loss": 0.6641, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.2840909090909092, |
| "grad_norm": 0.015499094565043982, |
| "learning_rate": 1.7180084824444325e-05, |
| "loss": 0.7034, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.2954545454545454, |
| "grad_norm": 0.01666315009242263, |
| "learning_rate": 1.6709518036198308e-05, |
| "loss": 0.6644, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.3068181818181819, |
| "grad_norm": 0.018550049211631166, |
| "learning_rate": 1.6242228810904392e-05, |
| "loss": 0.6314, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.3181818181818181, |
| "grad_norm": 0.016414228273475284, |
| "learning_rate": 1.5778401886899807e-05, |
| "loss": 0.7085, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.3295454545454546, |
| "grad_norm": 0.016629484072509887, |
| "learning_rate": 1.5318220633733978e-05, |
| "loss": 0.6328, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.3409090909090908, |
| "grad_norm": 0.01735441033712776, |
| "learning_rate": 1.4861866979675154e-05, |
| "loss": 0.6987, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.3522727272727273, |
| "grad_norm": 0.01653379311481811, |
| "learning_rate": 1.4409521339786808e-05, |
| "loss": 0.7689, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.017109513908993915, |
| "learning_rate": 1.3961362544602213e-05, |
| "loss": 0.852, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 0.015606168185551323, |
| "learning_rate": 1.3517567769425548e-05, |
| "loss": 0.6131, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.3863636363636362, |
| "grad_norm": 0.016954213866876906, |
| "learning_rate": 1.3078312464287353e-05, |
| "loss": 0.7262, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.3977272727272727, |
| "grad_norm": 0.019722191608232728, |
| "learning_rate": 1.2643770284581929e-05, |
| "loss": 0.6411, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.4090909090909092, |
| "grad_norm": 0.016754592315893345, |
| "learning_rate": 1.2214113022414448e-05, |
| "loss": 0.7406, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.4204545454545454, |
| "grad_norm": 0.01811366422363491, |
| "learning_rate": 1.1789510538684523e-05, |
| "loss": 0.6915, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.4318181818181819, |
| "grad_norm": 0.015117191146405742, |
| "learning_rate": 1.1370130695933318e-05, |
| "loss": 0.7914, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.4431818181818181, |
| "grad_norm": 0.019162871723577116, |
| "learning_rate": 1.0956139291980727e-05, |
| "loss": 0.7924, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.017257641671438298, |
| "learning_rate": 1.0547699994378787e-05, |
| "loss": 0.6671, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.4659090909090908, |
| "grad_norm": 0.018952048140386035, |
| "learning_rate": 1.0144974275707241e-05, |
| "loss": 0.7046, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.4772727272727273, |
| "grad_norm": 0.02016982804986924, |
| "learning_rate": 9.748121349736892e-06, |
| "loss": 0.7542, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.4886363636363638, |
| "grad_norm": 0.015861465051683968, |
| "learning_rate": 9.357298108486003e-06, |
| "loss": 0.6563, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.016677424892624696, |
| "learning_rate": 8.972659060194506e-06, |
| "loss": 0.6743, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.5113636363636362, |
| "grad_norm": 0.017220590202846452, |
| "learning_rate": 8.594356268240616e-06, |
| "loss": 0.5913, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.5227272727272727, |
| "grad_norm": 0.017476835977041916, |
| "learning_rate": 8.222539291024078e-06, |
| "loss": 0.6184, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.5340909090909092, |
| "grad_norm": 0.01852744989540818, |
| "learning_rate": 7.857355122839675e-06, |
| "loss": 0.8003, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 0.017724119693348375, |
| "learning_rate": 7.4989481357643694e-06, |
| "loss": 0.7319, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.5568181818181817, |
| "grad_norm": 0.01827897532047109, |
| "learning_rate": 7.147460022581257e-06, |
| "loss": 0.7496, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.5681818181818183, |
| "grad_norm": 0.017356487062778, |
| "learning_rate": 6.803029740762648e-06, |
| "loss": 0.6998, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.5795454545454546, |
| "grad_norm": 0.01850857557468877, |
| "learning_rate": 6.465793457534553e-06, |
| "loss": 0.6103, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.5909090909090908, |
| "grad_norm": 0.018299658199403747, |
| "learning_rate": 6.135884496044244e-06, |
| "loss": 0.6448, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.6022727272727273, |
| "grad_norm": 0.018388053763702464, |
| "learning_rate": 5.813433282652298e-06, |
| "loss": 0.6399, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.6136363636363638, |
| "grad_norm": 0.02080237717937431, |
| "learning_rate": 5.4985672953697e-06, |
| "loss": 0.6313, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.01796260664782129, |
| "learning_rate": 5.191411013460645e-06, |
| "loss": 0.6843, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.02056859413635753, |
| "learning_rate": 4.892085868230881e-06, |
| "loss": 0.7157, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.6477272727272727, |
| "grad_norm": 0.016274442923538548, |
| "learning_rate": 4.600710195020982e-06, |
| "loss": 0.7276, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.6590909090909092, |
| "grad_norm": 0.01815637927898507, |
| "learning_rate": 4.317399186423574e-06, |
| "loss": 0.6305, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.6704545454545454, |
| "grad_norm": 0.01897651128017473, |
| "learning_rate": 4.042264846743085e-06, |
| "loss": 0.675, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.6818181818181817, |
| "grad_norm": 0.018136754953765325, |
| "learning_rate": 3.775415947715899e-06, |
| "loss": 0.7152, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.6931818181818183, |
| "grad_norm": 0.018312167884684892, |
| "learning_rate": 3.516957985508476e-06, |
| "loss": 0.6128, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.7045454545454546, |
| "grad_norm": 0.01872198284953679, |
| "learning_rate": 3.266993139010438e-06, |
| "loss": 0.7636, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.7159090909090908, |
| "grad_norm": 0.019247219050936033, |
| "learning_rate": 3.0256202294391577e-06, |
| "loss": 0.7763, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.01747541840805843, |
| "learning_rate": 2.792934681271708e-06, |
| "loss": 0.718, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.7386363636363638, |
| "grad_norm": 0.016543357188300384, |
| "learning_rate": 2.5690284845196923e-06, |
| "loss": 0.7503, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.017777133602865168, |
| "learning_rate": 2.3539901583619185e-06, |
| "loss": 0.6796, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.7613636363636362, |
| "grad_norm": 0.017730228650558268, |
| "learning_rate": 2.147904716149135e-06, |
| "loss": 0.7887, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.7727272727272727, |
| "grad_norm": 0.016904387380041325, |
| "learning_rate": 1.9508536317948357e-06, |
| "loss": 0.7028, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.7840909090909092, |
| "grad_norm": 0.017048389518616498, |
| "learning_rate": 1.7629148075653245e-06, |
| "loss": 0.6987, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.7954545454545454, |
| "grad_norm": 0.019231086359691812, |
| "learning_rate": 1.5841625432818057e-06, |
| "loss": 0.7262, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.8068181818181817, |
| "grad_norm": 0.018406406662889105, |
| "learning_rate": 1.4146675069466403e-06, |
| "loss": 0.6232, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.01859904836356649, |
| "learning_rate": 1.2544967068054332e-06, |
| "loss": 0.7216, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.8295454545454546, |
| "grad_norm": 0.017748264732114775, |
| "learning_rate": 1.1037134648559794e-06, |
| "loss": 0.66, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.8409090909090908, |
| "grad_norm": 0.019172670841775385, |
| "learning_rate": 9.623773918144897e-07, |
| "loss": 0.6719, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.8522727272727273, |
| "grad_norm": 0.017201032284762996, |
| "learning_rate": 8.305443635490711e-07, |
| "loss": 0.6841, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.8636363636363638, |
| "grad_norm": 0.017602592399586683, |
| "learning_rate": 7.082664989897487e-07, |
| "loss": 0.759, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.01737274402951971, |
| "learning_rate": 5.955921395237318e-07, |
| "loss": 0.6251, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.8863636363636362, |
| "grad_norm": 0.018381873767541995, |
| "learning_rate": 4.925658298840979e-07, |
| "loss": 0.7006, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.8977272727272727, |
| "grad_norm": 0.017002639935913035, |
| "learning_rate": 3.992283005394837e-07, |
| "loss": 0.5917, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.9090909090909092, |
| "grad_norm": 0.017463979654969486, |
| "learning_rate": 3.1561645159166597e-07, |
| "loss": 0.6757, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.9204545454545454, |
| "grad_norm": 0.018406237369712176, |
| "learning_rate": 2.417633381874534e-07, |
| "loss": 0.7824, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.9318181818181817, |
| "grad_norm": 0.017967868873337563, |
| "learning_rate": 1.7769815745066475e-07, |
| "loss": 0.7394, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.9431818181818183, |
| "grad_norm": 0.01623825702144127, |
| "learning_rate": 1.234462369393147e-07, |
| "loss": 0.7684, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.9545454545454546, |
| "grad_norm": 0.017608033119901965, |
| "learning_rate": 7.90290246326042e-08, |
| "loss": 0.7831, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.9659090909090908, |
| "grad_norm": 0.017319359934865306, |
| "learning_rate": 4.4464080451675494e-08, |
| "loss": 0.6974, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.9772727272727273, |
| "grad_norm": 0.017475580158119938, |
| "learning_rate": 1.976506931745392e-08, |
| "loss": 0.7574, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.9886363636363638, |
| "grad_norm": 0.017048411256850495, |
| "learning_rate": 4.941755748361088e-09, |
| "loss": 0.6594, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.018638627651477082, |
| "learning_rate": 0.0, |
| "loss": 0.7388, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 176, |
| "total_flos": 3054747482324992.0, |
| "train_loss": 0.7215218588032506, |
| "train_runtime": 3666.1508, |
| "train_samples_per_second": 1.53, |
| "train_steps_per_second": 0.048 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 176, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3054747482324992.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|