| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9626168224299065, | |
| "eval_steps": 500, | |
| "global_step": 105, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.028037383177570093, | |
| "grad_norm": 5.46907120535221, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.6948, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.056074766355140186, | |
| "grad_norm": 5.695701169228273, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.659, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.08411214953271028, | |
| "grad_norm": 5.869868275090631, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.6481, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.11214953271028037, | |
| "grad_norm": 5.197705973950515, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.6641, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.14018691588785046, | |
| "grad_norm": 3.3963665075709906, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.5579, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.16822429906542055, | |
| "grad_norm": 2.20924738877731, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.6036, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.19626168224299065, | |
| "grad_norm": 1.984739518286341, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.5288, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.22429906542056074, | |
| "grad_norm": 1.9291295994072917, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 0.5054, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.2523364485981308, | |
| "grad_norm": 1.6389948204701459, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 0.5339, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.2803738317757009, | |
| "grad_norm": 1.4300152832592459, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.5433, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.308411214953271, | |
| "grad_norm": 1.711886946596263, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5537, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.3364485981308411, | |
| "grad_norm": 1.5442931844274987, | |
| "learning_rate": 9.997207818651273e-06, | |
| "loss": 0.498, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.3644859813084112, | |
| "grad_norm": 1.3068883684134325, | |
| "learning_rate": 9.988834393115768e-06, | |
| "loss": 0.6412, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.3925233644859813, | |
| "grad_norm": 1.2741183296138552, | |
| "learning_rate": 9.97488907544252e-06, | |
| "loss": 0.5796, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.4205607476635514, | |
| "grad_norm": 1.0164680008182152, | |
| "learning_rate": 9.955387440773902e-06, | |
| "loss": 0.5019, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.4485981308411215, | |
| "grad_norm": 1.036540358883728, | |
| "learning_rate": 9.930351269950144e-06, | |
| "loss": 0.5982, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.4766355140186916, | |
| "grad_norm": 1.0480857466954097, | |
| "learning_rate": 9.899808525182935e-06, | |
| "loss": 0.5377, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.5046728971962616, | |
| "grad_norm": 0.8148023572178399, | |
| "learning_rate": 9.863793318825186e-06, | |
| "loss": 0.4776, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.5327102803738317, | |
| "grad_norm": 1.0017091343990974, | |
| "learning_rate": 9.822345875271884e-06, | |
| "loss": 0.648, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.5607476635514018, | |
| "grad_norm": 0.7723478036715665, | |
| "learning_rate": 9.775512486034564e-06, | |
| "loss": 0.4544, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5887850467289719, | |
| "grad_norm": 0.8710833273679682, | |
| "learning_rate": 9.723345458039595e-06, | |
| "loss": 0.4936, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.616822429906542, | |
| "grad_norm": 0.9055028217774582, | |
| "learning_rate": 9.665903055208013e-06, | |
| "loss": 0.5819, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.6448598130841121, | |
| "grad_norm": 0.8103202901623571, | |
| "learning_rate": 9.603249433382145e-06, | |
| "loss": 0.5506, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.6728971962616822, | |
| "grad_norm": 0.7712121760995281, | |
| "learning_rate": 9.535454568671705e-06, | |
| "loss": 0.4932, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.7009345794392523, | |
| "grad_norm": 0.9091205022340342, | |
| "learning_rate": 9.462594179299408e-06, | |
| "loss": 0.5776, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.7289719626168224, | |
| "grad_norm": 0.7512412627292558, | |
| "learning_rate": 9.384749641033358e-06, | |
| "loss": 0.4665, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.7570093457943925, | |
| "grad_norm": 0.9759396921985853, | |
| "learning_rate": 9.302007896300697e-06, | |
| "loss": 0.6547, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.7850467289719626, | |
| "grad_norm": 0.8218161831230707, | |
| "learning_rate": 9.214461357083986e-06, | |
| "loss": 0.5403, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.8130841121495327, | |
| "grad_norm": 0.7802775736672756, | |
| "learning_rate": 9.122207801708802e-06, | |
| "loss": 0.4792, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.8411214953271028, | |
| "grad_norm": 1.2211854499314727, | |
| "learning_rate": 9.025350265637816e-06, | |
| "loss": 0.5767, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8691588785046729, | |
| "grad_norm": 0.7291315847510311, | |
| "learning_rate": 8.923996926393306e-06, | |
| "loss": 0.5339, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.897196261682243, | |
| "grad_norm": 0.7685026874314949, | |
| "learning_rate": 8.818260982736662e-06, | |
| "loss": 0.5102, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.9252336448598131, | |
| "grad_norm": 0.7819112577141099, | |
| "learning_rate": 8.708260528239788e-06, | |
| "loss": 0.5196, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.9532710280373832, | |
| "grad_norm": 0.805804762659976, | |
| "learning_rate": 8.594118419389648e-06, | |
| "loss": 0.5486, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.9813084112149533, | |
| "grad_norm": 0.6960763295513189, | |
| "learning_rate": 8.475962138373212e-06, | |
| "loss": 0.4885, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0186915887850467, | |
| "grad_norm": 1.261779486223386, | |
| "learning_rate": 8.353923650696119e-06, | |
| "loss": 0.7283, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.0467289719626167, | |
| "grad_norm": 2.1904861480940703, | |
| "learning_rate": 8.228139257794012e-06, | |
| "loss": 0.5858, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 1.074766355140187, | |
| "grad_norm": 0.836565359732725, | |
| "learning_rate": 8.098749444801226e-06, | |
| "loss": 0.4984, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.102803738317757, | |
| "grad_norm": 0.7156392071456283, | |
| "learning_rate": 7.965898723646777e-06, | |
| "loss": 0.4133, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 1.1308411214953271, | |
| "grad_norm": 0.7811150734148334, | |
| "learning_rate": 7.829735471652978e-06, | |
| "loss": 0.4737, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.158878504672897, | |
| "grad_norm": 0.8005430413673945, | |
| "learning_rate": 7.690411765816864e-06, | |
| "loss": 0.461, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 1.1869158878504673, | |
| "grad_norm": 0.734823009686673, | |
| "learning_rate": 7.548083212959588e-06, | |
| "loss": 0.4878, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.2149532710280373, | |
| "grad_norm": 0.9106888643783978, | |
| "learning_rate": 7.402908775933419e-06, | |
| "loss": 0.5398, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 1.2429906542056075, | |
| "grad_norm": 0.6471585113802202, | |
| "learning_rate": 7.25505059608051e-06, | |
| "loss": 0.4282, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.2710280373831775, | |
| "grad_norm": 0.7710047485521259, | |
| "learning_rate": 7.104673812141676e-06, | |
| "loss": 0.4352, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.2990654205607477, | |
| "grad_norm": 0.6543038711142444, | |
| "learning_rate": 6.9519463758174745e-06, | |
| "loss": 0.3746, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.3271028037383177, | |
| "grad_norm": 0.6480235385313834, | |
| "learning_rate": 6.797038864187564e-06, | |
| "loss": 0.4648, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.355140186915888, | |
| "grad_norm": 0.663332249258245, | |
| "learning_rate": 6.640124289197845e-06, | |
| "loss": 0.412, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.3831775700934579, | |
| "grad_norm": 0.735147105832247, | |
| "learning_rate": 6.481377904428171e-06, | |
| "loss": 0.4639, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.411214953271028, | |
| "grad_norm": 0.7169861474373482, | |
| "learning_rate": 6.3209770093564315e-06, | |
| "loss": 0.4511, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.439252336448598, | |
| "grad_norm": 0.7241007166406079, | |
| "learning_rate": 6.1591007513376425e-06, | |
| "loss": 0.4204, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.4672897196261683, | |
| "grad_norm": 0.7785664265979848, | |
| "learning_rate": 5.995929925519181e-06, | |
| "loss": 0.4717, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.4953271028037383, | |
| "grad_norm": 0.643031905575638, | |
| "learning_rate": 5.831646772915651e-06, | |
| "loss": 0.451, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.5233644859813085, | |
| "grad_norm": 0.6622611979477618, | |
| "learning_rate": 5.666434776868895e-06, | |
| "loss": 0.4598, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.5514018691588785, | |
| "grad_norm": 0.6681223638603929, | |
| "learning_rate": 5.500478458120493e-06, | |
| "loss": 0.4471, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.5794392523364484, | |
| "grad_norm": 0.6742446596507364, | |
| "learning_rate": 5.3339631687256085e-06, | |
| "loss": 0.4323, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.6074766355140186, | |
| "grad_norm": 0.6011350150512597, | |
| "learning_rate": 5.1670748850383734e-06, | |
| "loss": 0.421, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.6355140186915889, | |
| "grad_norm": 0.6177912691346672, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3956, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.6635514018691588, | |
| "grad_norm": 0.6868626354051661, | |
| "learning_rate": 4.832925114961629e-06, | |
| "loss": 0.4661, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.6915887850467288, | |
| "grad_norm": 0.6421560332745099, | |
| "learning_rate": 4.666036831274392e-06, | |
| "loss": 0.4704, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.719626168224299, | |
| "grad_norm": 0.7187797397726619, | |
| "learning_rate": 4.499521541879508e-06, | |
| "loss": 0.465, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.7476635514018692, | |
| "grad_norm": 0.5867907128348048, | |
| "learning_rate": 4.333565223131107e-06, | |
| "loss": 0.3283, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.7757009345794392, | |
| "grad_norm": 0.662616949214837, | |
| "learning_rate": 4.1683532270843505e-06, | |
| "loss": 0.4216, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.8037383177570092, | |
| "grad_norm": 0.6719507669829111, | |
| "learning_rate": 4.004070074480821e-06, | |
| "loss": 0.4443, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.8317757009345794, | |
| "grad_norm": 0.6464555389487582, | |
| "learning_rate": 3.840899248662358e-06, | |
| "loss": 0.4901, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.8598130841121496, | |
| "grad_norm": 0.642004590459993, | |
| "learning_rate": 3.6790229906435706e-06, | |
| "loss": 0.4687, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.8878504672897196, | |
| "grad_norm": 0.6811855386223414, | |
| "learning_rate": 3.518622095571831e-06, | |
| "loss": 0.5103, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.9158878504672896, | |
| "grad_norm": 0.6780639408715025, | |
| "learning_rate": 3.3598757108021546e-06, | |
| "loss": 0.4563, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.9439252336448598, | |
| "grad_norm": 0.6096518872897251, | |
| "learning_rate": 3.202961135812437e-06, | |
| "loss": 0.4273, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.97196261682243, | |
| "grad_norm": 0.6148567020386766, | |
| "learning_rate": 3.0480536241825263e-06, | |
| "loss": 0.3991, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.0093457943925235, | |
| "grad_norm": 1.1093908775727706, | |
| "learning_rate": 2.8953261878583263e-06, | |
| "loss": 0.6552, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 2.0373831775700935, | |
| "grad_norm": 0.5907034426928125, | |
| "learning_rate": 2.74494940391949e-06, | |
| "loss": 0.4244, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.0654205607476634, | |
| "grad_norm": 0.6237639437264948, | |
| "learning_rate": 2.5970912240665815e-06, | |
| "loss": 0.3433, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 2.0934579439252334, | |
| "grad_norm": 0.6456026273029044, | |
| "learning_rate": 2.4519167870404126e-06, | |
| "loss": 0.3633, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 2.121495327102804, | |
| "grad_norm": 0.6114145984670124, | |
| "learning_rate": 2.309588234183137e-06, | |
| "loss": 0.3607, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.149532710280374, | |
| "grad_norm": 0.5622408748138118, | |
| "learning_rate": 2.1702645283470238e-06, | |
| "loss": 0.3255, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 2.177570093457944, | |
| "grad_norm": 0.6289348884143411, | |
| "learning_rate": 2.0341012763532243e-06, | |
| "loss": 0.3917, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 2.205607476635514, | |
| "grad_norm": 2.11632857452312, | |
| "learning_rate": 1.9012505551987764e-06, | |
| "loss": 0.5284, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 2.2336448598130842, | |
| "grad_norm": 0.6011421789250062, | |
| "learning_rate": 1.771860742205988e-06, | |
| "loss": 0.3717, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 2.2616822429906542, | |
| "grad_norm": 0.605196953414431, | |
| "learning_rate": 1.646076349303884e-06, | |
| "loss": 0.3896, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.289719626168224, | |
| "grad_norm": 0.6538497800730606, | |
| "learning_rate": 1.5240378616267887e-06, | |
| "loss": 0.3952, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 2.317757009345794, | |
| "grad_norm": 0.6004986923958435, | |
| "learning_rate": 1.4058815806103542e-06, | |
| "loss": 0.4167, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 2.3457943925233646, | |
| "grad_norm": 0.5871010729484297, | |
| "learning_rate": 1.2917394717602123e-06, | |
| "loss": 0.3395, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 2.3738317757009346, | |
| "grad_norm": 0.6272792477288378, | |
| "learning_rate": 1.1817390172633402e-06, | |
| "loss": 0.3955, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 2.4018691588785046, | |
| "grad_norm": 0.602988779712659, | |
| "learning_rate": 1.0760030736066952e-06, | |
| "loss": 0.3712, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.4299065420560746, | |
| "grad_norm": 0.6514347127868433, | |
| "learning_rate": 9.746497343621857e-07, | |
| "loss": 0.439, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 2.457943925233645, | |
| "grad_norm": 0.6171164966676075, | |
| "learning_rate": 8.777921982911996e-07, | |
| "loss": 0.3293, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 2.485981308411215, | |
| "grad_norm": 0.5985061076335229, | |
| "learning_rate": 7.85538642916015e-07, | |
| "loss": 0.3554, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 2.514018691588785, | |
| "grad_norm": 0.6023670543052161, | |
| "learning_rate": 6.979921036993042e-07, | |
| "loss": 0.3759, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 2.542056074766355, | |
| "grad_norm": 0.6350504526148657, | |
| "learning_rate": 6.152503589666426e-07, | |
| "loss": 0.4048, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.5700934579439254, | |
| "grad_norm": 0.6232837840764863, | |
| "learning_rate": 5.374058207005945e-07, | |
| "loss": 0.4454, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 2.5981308411214954, | |
| "grad_norm": 0.6261955186268706, | |
| "learning_rate": 4.6454543132829653e-07, | |
| "loss": 0.3954, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 2.6261682242990654, | |
| "grad_norm": 0.6079216763424581, | |
| "learning_rate": 3.9675056661785563e-07, | |
| "loss": 0.4013, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 2.6542056074766354, | |
| "grad_norm": 0.6437379172509019, | |
| "learning_rate": 3.340969447919873e-07, | |
| "loss": 0.4624, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.6822429906542054, | |
| "grad_norm": 0.6182485342353894, | |
| "learning_rate": 2.7665454196040665e-07, | |
| "loss": 0.3614, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.710280373831776, | |
| "grad_norm": 0.6138213235934379, | |
| "learning_rate": 2.2448751396543788e-07, | |
| "loss": 0.3844, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.7383177570093458, | |
| "grad_norm": 0.5790126523883554, | |
| "learning_rate": 1.776541247281177e-07, | |
| "loss": 0.3865, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 2.7663551401869158, | |
| "grad_norm": 0.602742444058507, | |
| "learning_rate": 1.3620668117481471e-07, | |
| "loss": 0.4459, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.794392523364486, | |
| "grad_norm": 0.6125089915071265, | |
| "learning_rate": 1.0019147481706626e-07, | |
| "loss": 0.4199, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 2.822429906542056, | |
| "grad_norm": 0.6081244590976984, | |
| "learning_rate": 6.964873004985717e-08, | |
| "loss": 0.4009, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.850467289719626, | |
| "grad_norm": 0.7145788800241012, | |
| "learning_rate": 4.461255922609986e-08, | |
| "loss": 0.3987, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 2.878504672897196, | |
| "grad_norm": 0.6581593717216647, | |
| "learning_rate": 2.511092455747932e-08, | |
| "loss": 0.3866, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.906542056074766, | |
| "grad_norm": 0.6290938268876005, | |
| "learning_rate": 1.1165606884234182e-08, | |
| "loss": 0.4225, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 2.9345794392523366, | |
| "grad_norm": 0.6534972156634564, | |
| "learning_rate": 2.792181348726941e-09, | |
| "loss": 0.4279, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.9626168224299065, | |
| "grad_norm": 0.58156649259114, | |
| "learning_rate": 0.0, | |
| "loss": 0.3581, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.9626168224299065, | |
| "step": 105, | |
| "total_flos": 18524510396416.0, | |
| "train_loss": 0.47221575805119104, | |
| "train_runtime": 641.5687, | |
| "train_samples_per_second": 15.913, | |
| "train_steps_per_second": 0.164 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 105, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 18524510396416.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |