| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9988751406074241, | |
| "eval_steps": 500, | |
| "global_step": 777, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0012855535915153463, | |
| "grad_norm": 0.020836442708969116, | |
| "learning_rate": 0.0, | |
| "loss": 0.6262, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0025711071830306926, | |
| "grad_norm": 0.022134091705083847, | |
| "learning_rate": 4.075900941810124e-06, | |
| "loss": 0.8688, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003856660774546039, | |
| "grad_norm": 0.023200005292892456, | |
| "learning_rate": 6.46015014942309e-06, | |
| "loss": 0.7864, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.005142214366061385, | |
| "grad_norm": 0.02313530258834362, | |
| "learning_rate": 8.151801883620247e-06, | |
| "loss": 0.8897, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0064277679575767315, | |
| "grad_norm": 0.020561356097459793, | |
| "learning_rate": 9.463948908766788e-06, | |
| "loss": 0.6479, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007713321549092078, | |
| "grad_norm": 0.021870166063308716, | |
| "learning_rate": 1.0536051091233212e-05, | |
| "loss": 0.7501, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.008998875140607425, | |
| "grad_norm": 0.023460067808628082, | |
| "learning_rate": 1.1442500570809876e-05, | |
| "loss": 0.8672, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01028442873212277, | |
| "grad_norm": 0.02368471957743168, | |
| "learning_rate": 1.222770282543037e-05, | |
| "loss": 0.8984, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.011569982323638118, | |
| "grad_norm": 0.020765064284205437, | |
| "learning_rate": 1.292030029884618e-05, | |
| "loss": 0.6547, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.012855535915153463, | |
| "grad_norm": 0.023947741836309433, | |
| "learning_rate": 1.3539849850576912e-05, | |
| "loss": 0.8205, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01414108950666881, | |
| "grad_norm": 0.028013406321406364, | |
| "learning_rate": 1.4100300592531481e-05, | |
| "loss": 0.7891, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.015426643098184156, | |
| "grad_norm": 0.027111703529953957, | |
| "learning_rate": 1.4611952033043337e-05, | |
| "loss": 0.7678, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0167121966896995, | |
| "grad_norm": 0.028518904000520706, | |
| "learning_rate": 1.5082625732282867e-05, | |
| "loss": 0.8091, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01799775028121485, | |
| "grad_norm": 0.022600186988711357, | |
| "learning_rate": 1.551840151262e-05, | |
| "loss": 0.5875, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019283303872730195, | |
| "grad_norm": 0.021580247208476067, | |
| "learning_rate": 1.5924099058189875e-05, | |
| "loss": 0.6714, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02056885746424554, | |
| "grad_norm": 0.02505405619740486, | |
| "learning_rate": 1.6303603767240495e-05, | |
| "loss": 0.7534, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.021854411055760886, | |
| "grad_norm": 0.024437466636300087, | |
| "learning_rate": 1.6660093644266146e-05, | |
| "loss": 0.6945, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.023139964647276235, | |
| "grad_norm": 0.028052283450961113, | |
| "learning_rate": 1.6996201240656302e-05, | |
| "loss": 0.7076, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02442551823879158, | |
| "grad_norm": 0.03632762283086777, | |
| "learning_rate": 1.7314131752785847e-05, | |
| "loss": 0.769, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.025711071830306926, | |
| "grad_norm": 0.02896072156727314, | |
| "learning_rate": 1.7615750792387035e-05, | |
| "loss": 0.8087, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02699662542182227, | |
| "grad_norm": 0.034198954701423645, | |
| "learning_rate": 1.7902650720232966e-05, | |
| "loss": 0.8161, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.02828217901333762, | |
| "grad_norm": 0.03110469877719879, | |
| "learning_rate": 1.8176201534341607e-05, | |
| "loss": 0.8253, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.029567732604852966, | |
| "grad_norm": 0.039295781403779984, | |
| "learning_rate": 1.8437590437029225e-05, | |
| "loss": 0.9744, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.03085328619636831, | |
| "grad_norm": 0.03249296918511391, | |
| "learning_rate": 1.868785297485346e-05, | |
| "loss": 0.6455, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.032138839787883657, | |
| "grad_norm": 0.03106599487364292, | |
| "learning_rate": 1.8927897817533575e-05, | |
| "loss": 0.7005, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.033424393379399, | |
| "grad_norm": 0.03536655381321907, | |
| "learning_rate": 1.915852667409299e-05, | |
| "loss": 0.8004, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03470994697091435, | |
| "grad_norm": 0.035472676157951355, | |
| "learning_rate": 1.9380450448269272e-05, | |
| "loss": 0.675, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0359955005624297, | |
| "grad_norm": 0.03877939283847809, | |
| "learning_rate": 1.9594302454430122e-05, | |
| "loss": 0.6278, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.037281054153945045, | |
| "grad_norm": 0.041341230273246765, | |
| "learning_rate": 1.9800649313336155e-05, | |
| "loss": 0.914, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.03856660774546039, | |
| "grad_norm": 0.042063791304826736, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7664, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.039852161336975736, | |
| "grad_norm": 0.04166961461305618, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6324, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.04113771492849108, | |
| "grad_norm": 0.04256080463528633, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6771, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04242326852000643, | |
| "grad_norm": 0.042959265410900116, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6018, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04370882211152177, | |
| "grad_norm": 0.03880544751882553, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5819, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04499437570303712, | |
| "grad_norm": 0.0412827730178833, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8133, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04627992929455247, | |
| "grad_norm": 0.04274650663137436, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6237, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.047565482886067816, | |
| "grad_norm": 0.04136871546506882, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5851, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04885103647758316, | |
| "grad_norm": 0.04220248758792877, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7603, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.050136590069098506, | |
| "grad_norm": 0.039129678159952164, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5206, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.05142214366061385, | |
| "grad_norm": 0.04173429682850838, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0527076972521292, | |
| "grad_norm": 0.040010105818510056, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5964, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.05399325084364454, | |
| "grad_norm": 0.03841459006071091, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6162, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05527880443515989, | |
| "grad_norm": 0.04042840003967285, | |
| "learning_rate": 2e-05, | |
| "loss": 0.535, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.05656435802667524, | |
| "grad_norm": 0.040401577949523926, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5685, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.057849911618190586, | |
| "grad_norm": 0.06742753833532333, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8276, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05913546520970593, | |
| "grad_norm": 0.040345244109630585, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5988, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06042101880122128, | |
| "grad_norm": 0.0415828563272953, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6151, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.06170657239273662, | |
| "grad_norm": 0.041223231703042984, | |
| "learning_rate": 2e-05, | |
| "loss": 0.638, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06299212598425197, | |
| "grad_norm": 0.03628067672252655, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5031, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.06427767957576731, | |
| "grad_norm": 0.04399935156106949, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6615, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06556323316728266, | |
| "grad_norm": 0.04084352031350136, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5703, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.066848786758798, | |
| "grad_norm": 0.039231687784194946, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5325, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06813434035031335, | |
| "grad_norm": 0.04078860580921173, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5959, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0694198939418287, | |
| "grad_norm": 0.03753922879695892, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5652, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.07070544753334404, | |
| "grad_norm": 0.041337307542562485, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5579, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0719910011248594, | |
| "grad_norm": 0.03940434008836746, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4821, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07327655471637474, | |
| "grad_norm": 0.03760010376572609, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4442, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.07456210830789009, | |
| "grad_norm": 0.042540181428194046, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5285, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07584766189940544, | |
| "grad_norm": 0.0457993820309639, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7136, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.07713321549092078, | |
| "grad_norm": 0.033564481884241104, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4617, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07841876908243613, | |
| "grad_norm": 0.041546691209077835, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6912, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.07970432267395147, | |
| "grad_norm": 0.03729071840643883, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4584, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.08098987626546682, | |
| "grad_norm": 0.04159967973828316, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5392, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.08227542985698216, | |
| "grad_norm": 0.03827968239784241, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5548, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08356098344849751, | |
| "grad_norm": 0.04405729100108147, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6239, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.08484653704001285, | |
| "grad_norm": 0.03460558503866196, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4405, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0861320906315282, | |
| "grad_norm": 0.030664170160889626, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3396, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.08741764422304354, | |
| "grad_norm": 0.0376565083861351, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5822, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08870319781455889, | |
| "grad_norm": 0.0384797677397728, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5402, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.08998875140607424, | |
| "grad_norm": 0.030342888087034225, | |
| "learning_rate": 2e-05, | |
| "loss": 0.371, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09127430499758958, | |
| "grad_norm": 0.0449620746076107, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5723, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.09255985858910494, | |
| "grad_norm": 0.03808669000864029, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4842, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09384541218062029, | |
| "grad_norm": 0.03985065966844559, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4957, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.09513096577213563, | |
| "grad_norm": 0.030943365767598152, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4595, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.09641651936365098, | |
| "grad_norm": 0.03418966010212898, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3757, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.09770207295516632, | |
| "grad_norm": 0.033448606729507446, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4325, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09898762654668167, | |
| "grad_norm": 0.039748664945364, | |
| "learning_rate": 2e-05, | |
| "loss": 0.507, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.10027318013819701, | |
| "grad_norm": 0.04277816414833069, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6296, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.10155873372971236, | |
| "grad_norm": 0.029562752693891525, | |
| "learning_rate": 2e-05, | |
| "loss": 0.395, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1028442873212277, | |
| "grad_norm": 0.029590601101517677, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3807, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10412984091274305, | |
| "grad_norm": 0.031173471361398697, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4175, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1054153945042584, | |
| "grad_norm": 0.03821694105863571, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5547, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10670094809577374, | |
| "grad_norm": 0.02932704985141754, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3954, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.10798650168728909, | |
| "grad_norm": 0.030441921204328537, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4564, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10927205527880443, | |
| "grad_norm": 0.03350207954645157, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4477, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.11055760887031978, | |
| "grad_norm": 0.030435308814048767, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4292, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.11184316246183512, | |
| "grad_norm": 0.03452485054731369, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4572, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.11312871605335048, | |
| "grad_norm": 0.029849708080291748, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3826, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.11441426964486583, | |
| "grad_norm": 0.026589911431074142, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3335, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.11569982323638117, | |
| "grad_norm": 0.03767862543463707, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5377, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11698537682789652, | |
| "grad_norm": 0.030503496527671814, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4019, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.11827093041941186, | |
| "grad_norm": 0.02843611314892769, | |
| "learning_rate": 2e-05, | |
| "loss": 0.378, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11955648401092721, | |
| "grad_norm": 0.02735988050699234, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3842, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.12084203760244255, | |
| "grad_norm": 0.03628378361463547, | |
| "learning_rate": 2e-05, | |
| "loss": 0.502, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.1221275911939579, | |
| "grad_norm": 0.029980337247252464, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4177, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.12341314478547324, | |
| "grad_norm": 0.03486626222729683, | |
| "learning_rate": 2e-05, | |
| "loss": 0.495, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.12469869837698859, | |
| "grad_norm": 0.03005075454711914, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3618, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.12598425196850394, | |
| "grad_norm": 0.03018985688686371, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4078, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1272698055600193, | |
| "grad_norm": 0.03108677826821804, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4583, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.12855535915153463, | |
| "grad_norm": 0.029582438990473747, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4142, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12984091274304999, | |
| "grad_norm": 0.02979620173573494, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4535, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.13112646633456532, | |
| "grad_norm": 0.032250065356492996, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3805, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.13241201992608068, | |
| "grad_norm": 0.03306899964809418, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4351, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.133697573517596, | |
| "grad_norm": 0.023130670189857483, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2417, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.13498312710911137, | |
| "grad_norm": 0.03372225537896156, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4703, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1362686807006267, | |
| "grad_norm": 0.02907857671380043, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3437, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13755423429214206, | |
| "grad_norm": 0.03021407686173916, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4327, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.1388397878836574, | |
| "grad_norm": 0.027038615196943283, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3652, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.14012534147517275, | |
| "grad_norm": 0.02982942759990692, | |
| "learning_rate": 2e-05, | |
| "loss": 0.345, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.14141089506668808, | |
| "grad_norm": 0.0561259388923645, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5073, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14269644865820344, | |
| "grad_norm": 0.024736687541007996, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3149, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.1439820022497188, | |
| "grad_norm": 0.02275976352393627, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3147, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.14526755584123413, | |
| "grad_norm": 0.030464742332696915, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4512, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1465531094327495, | |
| "grad_norm": 0.026887530460953712, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3679, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14783866302426482, | |
| "grad_norm": 0.03605503961443901, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4392, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.14912421661578018, | |
| "grad_norm": 0.02638978883624077, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3484, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1504097702072955, | |
| "grad_norm": 0.03650350496172905, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3978, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.15169532379881087, | |
| "grad_norm": 0.022277837619185448, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2525, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1529808773903262, | |
| "grad_norm": 0.021412434056401253, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2922, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.15426643098184156, | |
| "grad_norm": 0.029154105111956596, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1555519845733569, | |
| "grad_norm": 0.024072440341114998, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2467, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.15683753816487225, | |
| "grad_norm": 0.019447140395641327, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2086, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.15812309175638758, | |
| "grad_norm": 0.035536400973796844, | |
| "learning_rate": 2e-05, | |
| "loss": 0.489, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.15940864534790294, | |
| "grad_norm": 0.026226134970784187, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3502, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.16069419893941828, | |
| "grad_norm": 0.029284900054335594, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3683, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.16197975253093364, | |
| "grad_norm": 0.026484966278076172, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3686, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.16326530612244897, | |
| "grad_norm": 0.03296555206179619, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4598, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.16455085971396433, | |
| "grad_norm": 0.03217398375272751, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4292, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.16583641330547969, | |
| "grad_norm": 0.02639828808605671, | |
| "learning_rate": 2e-05, | |
| "loss": 0.324, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.16712196689699502, | |
| "grad_norm": 0.025398138910531998, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3565, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16840752048851038, | |
| "grad_norm": 0.026609797030687332, | |
| "learning_rate": 2e-05, | |
| "loss": 0.326, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1696930740800257, | |
| "grad_norm": 0.029938040301203728, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4149, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17097862767154107, | |
| "grad_norm": 0.02608969807624817, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3774, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1722641812630564, | |
| "grad_norm": 0.02580363303422928, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2944, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.17354973485457176, | |
| "grad_norm": 0.029851458966732025, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3316, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1748352884460871, | |
| "grad_norm": 0.02928406558930874, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3548, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.17612084203760245, | |
| "grad_norm": 0.030875032767653465, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3617, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.17740639562911778, | |
| "grad_norm": 0.026721350848674774, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3799, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17869194922063314, | |
| "grad_norm": 0.03269115090370178, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4324, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.17997750281214847, | |
| "grad_norm": 0.022154508158564568, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2744, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18126305640366383, | |
| "grad_norm": 0.022251179441809654, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2886, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.18254860999517916, | |
| "grad_norm": 0.03386593237519264, | |
| "learning_rate": 2e-05, | |
| "loss": 0.473, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.18383416358669452, | |
| "grad_norm": 0.02578306384384632, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3224, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.18511971717820988, | |
| "grad_norm": 0.027509864419698715, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3224, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1864052707697252, | |
| "grad_norm": 0.02819378860294819, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3176, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.18769082436124057, | |
| "grad_norm": 0.028061147779226303, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3494, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1889763779527559, | |
| "grad_norm": 0.032399386167526245, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3647, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.19026193154427126, | |
| "grad_norm": 0.028246790170669556, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3366, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.1915474851357866, | |
| "grad_norm": 0.03099609911441803, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4034, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.19283303872730195, | |
| "grad_norm": 0.03750993683934212, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3395, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19411859231881728, | |
| "grad_norm": 0.0326780304312706, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4482, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.19540414591033264, | |
| "grad_norm": 0.033816393464803696, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4504, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.19668969950184798, | |
| "grad_norm": 0.026754887774586678, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3334, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.19797525309336333, | |
| "grad_norm": 0.02957574650645256, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3698, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.19926080668487867, | |
| "grad_norm": 0.02848845347762108, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3001, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.20054636027639403, | |
| "grad_norm": 0.03636415675282478, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4872, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.20183191386790936, | |
| "grad_norm": 0.018864037469029427, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2086, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.20311746745942472, | |
| "grad_norm": 0.027126725763082504, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3181, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20440302105094005, | |
| "grad_norm": 0.025296056643128395, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3169, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2056885746424554, | |
| "grad_norm": 0.035376112908124924, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4219, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.20697412823397077, | |
| "grad_norm": 0.030744420364499092, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3818, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.2082596818254861, | |
| "grad_norm": 0.03273791819810867, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3823, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20954523541700146, | |
| "grad_norm": 0.030423806980252266, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3451, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.2108307890085168, | |
| "grad_norm": 0.029618561267852783, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3604, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.21211634260003215, | |
| "grad_norm": 0.030883729457855225, | |
| "learning_rate": 2e-05, | |
| "loss": 0.401, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.21340189619154748, | |
| "grad_norm": 0.028922105208039284, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3305, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.21468744978306284, | |
| "grad_norm": 0.033665966242551804, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3175, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.21597300337457817, | |
| "grad_norm": 0.035460278391838074, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4509, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.21725855696609353, | |
| "grad_norm": 0.026533829048275948, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2403, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.21854411055760886, | |
| "grad_norm": 0.029200293123722076, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2794, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21982966414912422, | |
| "grad_norm": 0.027879290282726288, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2995, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.22111521774063955, | |
| "grad_norm": 0.027549387887120247, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2803, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2224007713321549, | |
| "grad_norm": 0.03113819658756256, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2479, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.22368632492367024, | |
| "grad_norm": 0.024273231625556946, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2705, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2249718785151856, | |
| "grad_norm": 0.02970244735479355, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3266, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.22625743210670096, | |
| "grad_norm": 0.028792355209589005, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3311, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2275429856982163, | |
| "grad_norm": 0.029121607542037964, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2814, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.22882853928973165, | |
| "grad_norm": 0.029099591076374054, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3065, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.23011409288124698, | |
| "grad_norm": 0.02833685837686062, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3459, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.23139964647276234, | |
| "grad_norm": 0.03676662966609001, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4179, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23268520006427768, | |
| "grad_norm": 0.02846740558743477, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2879, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.23397075365579303, | |
| "grad_norm": 0.030531803146004677, | |
| "learning_rate": 2e-05, | |
| "loss": 0.362, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.23525630724730837, | |
| "grad_norm": 0.034853462129831314, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3814, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.23654186083882373, | |
| "grad_norm": 0.03336189687252045, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4272, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.23782741443033906, | |
| "grad_norm": 0.03514046594500542, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3432, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.23911296802185442, | |
| "grad_norm": 0.032468028366565704, | |
| "learning_rate": 2e-05, | |
| "loss": 0.405, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.24039852161336975, | |
| "grad_norm": 0.026813151314854622, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2746, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2416840752048851, | |
| "grad_norm": 0.03329463675618172, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3566, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.24296962879640044, | |
| "grad_norm": 0.03253549337387085, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3951, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2442551823879158, | |
| "grad_norm": 0.03337908163666725, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4022, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.24554073597943113, | |
| "grad_norm": 0.029503121972084045, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3154, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.2468262895709465, | |
| "grad_norm": 0.03800208494067192, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3716, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.24811184316246185, | |
| "grad_norm": 0.04471494257450104, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3618, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.24939739675397718, | |
| "grad_norm": 0.03158828616142273, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3035, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.25068295034549254, | |
| "grad_norm": 0.030343275517225266, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3177, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.25196850393700787, | |
| "grad_norm": 0.027333417907357216, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2688, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2532540575285232, | |
| "grad_norm": 0.034231096506118774, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2827, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2545396111200386, | |
| "grad_norm": 0.042767249047756195, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3617, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2558251647115539, | |
| "grad_norm": 0.04363776370882988, | |
| "learning_rate": 2e-05, | |
| "loss": 0.428, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.25711071830306925, | |
| "grad_norm": 0.03701059892773628, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4258, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2583962718945846, | |
| "grad_norm": 0.03248538821935654, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3127, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.25968182548609997, | |
| "grad_norm": 0.02792442962527275, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2616, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2609673790776153, | |
| "grad_norm": 0.02882961928844452, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2822, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.26225293266913063, | |
| "grad_norm": 0.02498476952314377, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2291, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.26353848626064597, | |
| "grad_norm": 0.0262466911226511, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2084, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.26482403985216135, | |
| "grad_norm": 0.031161930412054062, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2977, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2661095934436767, | |
| "grad_norm": 0.03852604702115059, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3606, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.267395147035192, | |
| "grad_norm": 0.03641024976968765, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2686807006267074, | |
| "grad_norm": 0.03774799406528473, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3458, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.26996625421822273, | |
| "grad_norm": 0.04067372530698776, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4515, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27125180780973807, | |
| "grad_norm": 0.03964482620358467, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4272, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2725373614012534, | |
| "grad_norm": 0.02894040197134018, | |
| "learning_rate": 2e-05, | |
| "loss": 0.256, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2738229149927688, | |
| "grad_norm": 0.036077771335840225, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3755, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.2751084685842841, | |
| "grad_norm": 0.032988108694553375, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3135, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.27639402217579945, | |
| "grad_norm": 0.02877802960574627, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2762, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2776795757673148, | |
| "grad_norm": 0.03700711205601692, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3022, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.27896512935883017, | |
| "grad_norm": 0.03660174459218979, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3265, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2802506829503455, | |
| "grad_norm": 0.034895338118076324, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3337, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.28153623654186083, | |
| "grad_norm": 0.029524167999625206, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2872, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.28282179013337616, | |
| "grad_norm": 0.037102892994880676, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3484, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.28410734372489155, | |
| "grad_norm": 0.02568918839097023, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2352, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2853928973164069, | |
| "grad_norm": 0.03680694103240967, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3156, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2866784509079222, | |
| "grad_norm": 0.03616785258054733, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3435, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2879640044994376, | |
| "grad_norm": 0.03019794449210167, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2342, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.28924955809095293, | |
| "grad_norm": 0.029189620167016983, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2622, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.29053511168246826, | |
| "grad_norm": 0.03722851350903511, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3245, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2918206652739836, | |
| "grad_norm": 0.028928019106388092, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2444, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.293106218865499, | |
| "grad_norm": 0.03965122997760773, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2914, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2943917724570143, | |
| "grad_norm": 0.03618443012237549, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2944, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.29567732604852964, | |
| "grad_norm": 0.04255329445004463, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3803, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.296962879640045, | |
| "grad_norm": 0.03631114959716797, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3529, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.29824843323156036, | |
| "grad_norm": 0.0347764790058136, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2967, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2995339868230757, | |
| "grad_norm": 0.03510100021958351, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3316, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.300819540414591, | |
| "grad_norm": 0.03378084674477577, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3318, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.30210509400610636, | |
| "grad_norm": 0.035719968378543854, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2408, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.30339064759762174, | |
| "grad_norm": 0.03345809876918793, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2553, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3046762011891371, | |
| "grad_norm": 0.03555387631058693, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2032, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.3059617547806524, | |
| "grad_norm": 0.037534430623054504, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3482, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.30724730837216774, | |
| "grad_norm": 0.03810921311378479, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2931, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.3085328619636831, | |
| "grad_norm": 0.03767091780900955, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3031, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.30981841555519846, | |
| "grad_norm": 0.04636585712432861, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3323, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.3111039691467138, | |
| "grad_norm": 0.02405642159283161, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1964, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3123895227382292, | |
| "grad_norm": 0.03820343688130379, | |
| "learning_rate": 2e-05, | |
| "loss": 0.265, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3136750763297445, | |
| "grad_norm": 0.04235352948307991, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3761, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.31496062992125984, | |
| "grad_norm": 0.02953983098268509, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2458, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.31624618351277517, | |
| "grad_norm": 0.031593743711709976, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2278, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.31753173710429056, | |
| "grad_norm": 0.033025920391082764, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2967, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.3188172906958059, | |
| "grad_norm": 0.03608924522995949, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3211, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3201028442873212, | |
| "grad_norm": 0.029520737007260323, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2659, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.32138839787883655, | |
| "grad_norm": 0.043838564306497574, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4001, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32267395147035194, | |
| "grad_norm": 0.03314085677266121, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2669, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.32395950506186727, | |
| "grad_norm": 0.03647439181804657, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3063, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3252450586533826, | |
| "grad_norm": 0.03778000921010971, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3437, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.32653061224489793, | |
| "grad_norm": 0.035549599677324295, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3332, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3278161658364133, | |
| "grad_norm": 0.033758629113435745, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2372, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.32910171942792865, | |
| "grad_norm": 0.04042687267065048, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2862, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.330387273019444, | |
| "grad_norm": 0.032794684171676636, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2631, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.33167282661095937, | |
| "grad_norm": 0.03374920412898064, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2703, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3329583802024747, | |
| "grad_norm": 0.03981158137321472, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3021, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.33424393379399003, | |
| "grad_norm": 0.034164056181907654, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3245, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33552948738550537, | |
| "grad_norm": 0.03673673793673515, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3044, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.33681504097702075, | |
| "grad_norm": 0.04251427203416824, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3132, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3381005945685361, | |
| "grad_norm": 0.055292125791311264, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3668, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.3393861481600514, | |
| "grad_norm": 0.03982202708721161, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3389, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.34067170175156675, | |
| "grad_norm": 0.03548764809966087, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2439, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.34195725534308213, | |
| "grad_norm": 0.04806696996092796, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3923, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.34324280893459747, | |
| "grad_norm": 0.036050595343112946, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2605, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3445283625261128, | |
| "grad_norm": 0.032735515385866165, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2451, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.34581391611762813, | |
| "grad_norm": 0.039695464074611664, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3072, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3470994697091435, | |
| "grad_norm": 0.027333933860063553, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2099, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.34838502330065885, | |
| "grad_norm": 0.03149592876434326, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2613, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3496705768921742, | |
| "grad_norm": 0.031215226277709007, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2833, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.35095613048368957, | |
| "grad_norm": 0.04059711471199989, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3666, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.3522416840752049, | |
| "grad_norm": 0.04247285798192024, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3758, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.35352723766672023, | |
| "grad_norm": 0.034378454089164734, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2519, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.35481279125823556, | |
| "grad_norm": 0.037096619606018066, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3256, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.35609834484975095, | |
| "grad_norm": 0.03536511957645416, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2596, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3573838984412663, | |
| "grad_norm": 0.046086303889751434, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3132, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3586694520327816, | |
| "grad_norm": 0.03302552178502083, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2839, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.35995500562429694, | |
| "grad_norm": 0.03423115238547325, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2678, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.36124055921581233, | |
| "grad_norm": 0.03363805264234543, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2243, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.36252611280732766, | |
| "grad_norm": 0.03901209309697151, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2956, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.363811666398843, | |
| "grad_norm": 0.03081115335226059, | |
| "learning_rate": 2e-05, | |
| "loss": 0.213, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.3650972199903583, | |
| "grad_norm": 0.04130322486162186, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3161, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3663827735818737, | |
| "grad_norm": 0.03694218024611473, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3228, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.36766832717338904, | |
| "grad_norm": 0.048961639404296875, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3688, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3689538807649044, | |
| "grad_norm": 0.03482965752482414, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2797, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.37023943435641976, | |
| "grad_norm": 0.043517641723155975, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3395, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3715249879479351, | |
| "grad_norm": 0.03916122019290924, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3168, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3728105415394504, | |
| "grad_norm": 0.03970535099506378, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3523, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37409609513096576, | |
| "grad_norm": 0.043576546013355255, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3974, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.37538164872248114, | |
| "grad_norm": 0.03478504344820976, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2663, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3766672023139965, | |
| "grad_norm": 0.0442640446126461, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2685, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3779527559055118, | |
| "grad_norm": 0.04135148599743843, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3765, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.37923830949702714, | |
| "grad_norm": 0.03744332864880562, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3693, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3805238630885425, | |
| "grad_norm": 0.038954440504312515, | |
| "learning_rate": 2e-05, | |
| "loss": 0.289, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.38180941668005786, | |
| "grad_norm": 0.031730618327856064, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2271, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3830949702715732, | |
| "grad_norm": 0.04433518648147583, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4079, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3843805238630885, | |
| "grad_norm": 0.04384070262312889, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3005, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.3856660774546039, | |
| "grad_norm": 0.03004288114607334, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2113, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38695163104611924, | |
| "grad_norm": 0.0353570394217968, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2198, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.38823718463763457, | |
| "grad_norm": 0.04267432913184166, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3431, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3895227382291499, | |
| "grad_norm": 0.04084617272019386, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3099, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3908082918206653, | |
| "grad_norm": 0.059954188764095306, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3891, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3920938454121806, | |
| "grad_norm": 0.03467090055346489, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2383, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.39337939900369595, | |
| "grad_norm": 0.03164566680788994, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2079, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.39466495259521134, | |
| "grad_norm": 0.048123132437467575, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3726, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.39595050618672667, | |
| "grad_norm": 0.03534458950161934, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2651, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.397236059778242, | |
| "grad_norm": 0.03836483508348465, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3101, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.39852161336975733, | |
| "grad_norm": 0.047910891473293304, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3234, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3998071669612727, | |
| "grad_norm": 0.027741173282265663, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1632, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.40109272055278805, | |
| "grad_norm": 0.0344574935734272, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2463, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4023782741443034, | |
| "grad_norm": 0.032118018716573715, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2298, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.4036638277358187, | |
| "grad_norm": 0.040490612387657166, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3247, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4049493813273341, | |
| "grad_norm": 0.03369493409991264, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2088, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.40623493491884943, | |
| "grad_norm": 0.04419386386871338, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3354, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.40752048851036476, | |
| "grad_norm": 0.04048989340662956, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2988, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.4088060421018801, | |
| "grad_norm": 0.040915414690971375, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2315, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.4100915956933955, | |
| "grad_norm": 0.03020886704325676, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2137, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.4113771492849108, | |
| "grad_norm": 0.0413849912583828, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3479, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.41266270287642615, | |
| "grad_norm": 0.04639044404029846, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3689, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.41394825646794153, | |
| "grad_norm": 0.044351786375045776, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3488, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.41523381005945686, | |
| "grad_norm": 0.030558589845895767, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2211, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.4165193636509722, | |
| "grad_norm": 0.03329205513000488, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2282, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.41780491724248753, | |
| "grad_norm": 0.04240158200263977, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2571, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4190904708340029, | |
| "grad_norm": 0.040866266936063766, | |
| "learning_rate": 2e-05, | |
| "loss": 0.289, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.42037602442551825, | |
| "grad_norm": 0.04475086182355881, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2889, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.4216615780170336, | |
| "grad_norm": 0.03587472438812256, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2452, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.4229471316085489, | |
| "grad_norm": 0.04346352815628052, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3751, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4242326852000643, | |
| "grad_norm": 0.03417763113975525, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2825, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.42551823879157963, | |
| "grad_norm": 0.030223989859223366, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2339, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.42680379238309496, | |
| "grad_norm": 0.0342961922287941, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2616, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4280893459746103, | |
| "grad_norm": 0.04207473620772362, | |
| "learning_rate": 2e-05, | |
| "loss": 0.265, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.4293748995661257, | |
| "grad_norm": 0.03148888424038887, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1792, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.430660453157641, | |
| "grad_norm": 0.039937492460012436, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2502, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.43194600674915634, | |
| "grad_norm": 0.03943054750561714, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2733, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.43323156034067173, | |
| "grad_norm": 0.03569771721959114, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2099, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.43451711393218706, | |
| "grad_norm": 0.036599624902009964, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2478, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.4358026675237024, | |
| "grad_norm": 0.054707758128643036, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4257, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4370882211152177, | |
| "grad_norm": 0.0450870580971241, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3091, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4383737747067331, | |
| "grad_norm": 0.03818565234541893, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2781, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.43965932829824844, | |
| "grad_norm": 0.03722561523318291, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2602, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.4409448818897638, | |
| "grad_norm": 0.038348764181137085, | |
| "learning_rate": 2e-05, | |
| "loss": 0.286, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.4422304354812791, | |
| "grad_norm": 0.02572775073349476, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1886, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.4435159890727945, | |
| "grad_norm": 0.03972122073173523, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2756, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.4448015426643098, | |
| "grad_norm": 0.03696167469024658, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2526, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.44608709625582516, | |
| "grad_norm": 0.03587668761610985, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2044, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.4473726498473405, | |
| "grad_norm": 0.03959975019097328, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3007, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.4486582034388559, | |
| "grad_norm": 0.03879138454794884, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2382, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.4499437570303712, | |
| "grad_norm": 0.05302846059203148, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3375, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.45122931062188654, | |
| "grad_norm": 0.039411693811416626, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2662, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.4525148642134019, | |
| "grad_norm": 0.03571093827486038, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2054, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.45380041780491726, | |
| "grad_norm": 0.0486789233982563, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3314, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.4550859713964326, | |
| "grad_norm": 0.037670183926820755, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2484, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4563715249879479, | |
| "grad_norm": 0.056887123733758926, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3562, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.4576570785794633, | |
| "grad_norm": 0.04562405124306679, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2869, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.45894263217097864, | |
| "grad_norm": 0.040491264313459396, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3541, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.46022818576249397, | |
| "grad_norm": 0.04283326864242554, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2674, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4615137393540093, | |
| "grad_norm": 0.05063975229859352, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4013, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.4627992929455247, | |
| "grad_norm": 0.037555571645498276, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2419, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.46408484653704, | |
| "grad_norm": 0.036944594234228134, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2426, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.46537040012855535, | |
| "grad_norm": 0.05010130628943443, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2996, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4666559537200707, | |
| "grad_norm": 0.0335206501185894, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2451, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.46794150731158607, | |
| "grad_norm": 0.052481383085250854, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3812, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4692270609031014, | |
| "grad_norm": 0.04185234755277634, | |
| "learning_rate": 2e-05, | |
| "loss": 0.274, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.47051261449461673, | |
| "grad_norm": 0.03707558289170265, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2505, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.47179816808613206, | |
| "grad_norm": 0.060728251934051514, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3279, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.47308372167764745, | |
| "grad_norm": 0.031999371945858, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1866, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4743692752691628, | |
| "grad_norm": 0.044399287551641464, | |
| "learning_rate": 2e-05, | |
| "loss": 0.249, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.4756548288606781, | |
| "grad_norm": 0.05057983100414276, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3612, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4769403824521935, | |
| "grad_norm": 0.039979059249162674, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2684, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.47822593604370883, | |
| "grad_norm": 0.03305087611079216, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2164, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.47951148963522416, | |
| "grad_norm": 0.045574892312288284, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3127, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.4807970432267395, | |
| "grad_norm": 0.05269627645611763, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3315, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4820825968182549, | |
| "grad_norm": 0.06162478029727936, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3347, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4833681504097702, | |
| "grad_norm": 0.04428340122103691, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2794, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.48465370400128555, | |
| "grad_norm": 0.04249970242381096, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2781, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4859392575928009, | |
| "grad_norm": 0.04270468279719353, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2878, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.48722481118431626, | |
| "grad_norm": 0.036853183060884476, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2548, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4885103647758316, | |
| "grad_norm": 0.03981437534093857, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2743, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4897959183673469, | |
| "grad_norm": 0.04621482267975807, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3524, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.49108147195886226, | |
| "grad_norm": 0.04479382932186127, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3013, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.49236702555037765, | |
| "grad_norm": 0.0524832084774971, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4674, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.493652579141893, | |
| "grad_norm": 0.05657699331641197, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3734, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4949381327334083, | |
| "grad_norm": 0.05035189166665077, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2522, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4962236863249237, | |
| "grad_norm": 0.045344091951847076, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3121, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.49750923991643903, | |
| "grad_norm": 0.038680486381053925, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2999, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.49879479350795436, | |
| "grad_norm": 0.03980954363942146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2476, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5000803470994697, | |
| "grad_norm": 0.04812563210725784, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3218, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5013659006909851, | |
| "grad_norm": 0.04132760316133499, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2344, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5026514542825004, | |
| "grad_norm": 0.03867589682340622, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2172, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5039370078740157, | |
| "grad_norm": 0.05404170975089073, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3489, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5052225614655311, | |
| "grad_norm": 0.05424851179122925, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3908, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5065081150570464, | |
| "grad_norm": 0.046993743628263474, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3133, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5077936686485618, | |
| "grad_norm": 0.038952894508838654, | |
| "learning_rate": 2e-05, | |
| "loss": 0.273, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5090792222400772, | |
| "grad_norm": 0.039642345160245895, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2163, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5103647758315925, | |
| "grad_norm": 0.05045924335718155, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3934, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.5116503294231078, | |
| "grad_norm": 0.03384791314601898, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2427, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5129358830146231, | |
| "grad_norm": 0.04521351680159569, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3329, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.5142214366061385, | |
| "grad_norm": 0.044563427567481995, | |
| "learning_rate": 2e-05, | |
| "loss": 0.327, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5155069901976539, | |
| "grad_norm": 0.027659917250275612, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1984, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.5167925437891692, | |
| "grad_norm": 0.047275714576244354, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2867, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5180780973806846, | |
| "grad_norm": 0.04775230586528778, | |
| "learning_rate": 2e-05, | |
| "loss": 0.355, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.5193636509721999, | |
| "grad_norm": 0.04720161855220795, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2423, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5206492045637152, | |
| "grad_norm": 0.04180417209863663, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2688, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.5219347581552306, | |
| "grad_norm": 0.05189646780490875, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3736, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.523220311746746, | |
| "grad_norm": 0.04067251831293106, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3039, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.5245058653382613, | |
| "grad_norm": 0.05931917205452919, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3374, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5257914189297767, | |
| "grad_norm": 0.04547608271241188, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2968, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.5270769725212919, | |
| "grad_norm": 0.04650389403104782, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2854, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5283625261128073, | |
| "grad_norm": 0.05240015685558319, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2837, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.5296480797043227, | |
| "grad_norm": 0.05040004476904869, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2923, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.530933633295838, | |
| "grad_norm": 0.04871930554509163, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2414, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.5322191868873534, | |
| "grad_norm": 0.04192574322223663, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2764, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5335047404788688, | |
| "grad_norm": 0.05296563729643822, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2723, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.534790294070384, | |
| "grad_norm": 0.03959592431783676, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2204, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5360758476618994, | |
| "grad_norm": 0.03962741047143936, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2518, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.5373614012534148, | |
| "grad_norm": 0.040081944316625595, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2573, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5386469548449301, | |
| "grad_norm": 0.04713954031467438, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2925, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.5399325084364455, | |
| "grad_norm": 0.05657007545232773, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4272, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5412180620279607, | |
| "grad_norm": 0.05307560786604881, | |
| "learning_rate": 2e-05, | |
| "loss": 0.316, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.5425036156194761, | |
| "grad_norm": 0.04280155152082443, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2614, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5437891692109915, | |
| "grad_norm": 0.03501439467072487, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2318, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.5450747228025068, | |
| "grad_norm": 0.05088590830564499, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3533, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5463602763940222, | |
| "grad_norm": 0.03503134846687317, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2079, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5476458299855376, | |
| "grad_norm": 0.043812718242406845, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3205, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5489313835770528, | |
| "grad_norm": 0.05358745902776718, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3713, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.5502169371685682, | |
| "grad_norm": 0.042078517377376556, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2371, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5515024907600835, | |
| "grad_norm": 0.04489399120211601, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2832, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5527880443515989, | |
| "grad_norm": 0.04766567423939705, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2151, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5540735979431143, | |
| "grad_norm": 0.04447382688522339, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2317, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.5553591515346296, | |
| "grad_norm": 0.04144001007080078, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2667, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.556644705126145, | |
| "grad_norm": 0.04112810641527176, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2758, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.5579302587176603, | |
| "grad_norm": 0.032402511686086655, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2047, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5592158123091756, | |
| "grad_norm": 0.04352883994579315, | |
| "learning_rate": 2e-05, | |
| "loss": 0.323, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.560501365900691, | |
| "grad_norm": 0.0496109239757061, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2962, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5617869194922064, | |
| "grad_norm": 0.04593720659613609, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3705, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.5630724730837217, | |
| "grad_norm": 0.040998801589012146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2219, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.564358026675237, | |
| "grad_norm": 0.04891293868422508, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2539, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5656435802667523, | |
| "grad_norm": 0.04628092423081398, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2521, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5669291338582677, | |
| "grad_norm": 0.03929414600133896, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1931, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.5682146874497831, | |
| "grad_norm": 0.03937762975692749, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2225, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5695002410412984, | |
| "grad_norm": 0.057498469948768616, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4021, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.5707857946328138, | |
| "grad_norm": 0.04665215313434601, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3026, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5720713482243291, | |
| "grad_norm": 0.04521113634109497, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2592, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5733569018158444, | |
| "grad_norm": 0.038349051028490067, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2501, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5746424554073598, | |
| "grad_norm": 0.04515808820724487, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3092, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5759280089988752, | |
| "grad_norm": 0.047012921422719955, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3338, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5772135625903905, | |
| "grad_norm": 0.0472906231880188, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3139, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.5784991161819059, | |
| "grad_norm": 0.04748733341693878, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2414, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5797846697734211, | |
| "grad_norm": 0.03514058515429497, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1946, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5810702233649365, | |
| "grad_norm": 0.050174906849861145, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3284, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5823557769564519, | |
| "grad_norm": 0.05283737555146217, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3073, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5836413305479672, | |
| "grad_norm": 0.04498602822422981, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2604, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5849268841394826, | |
| "grad_norm": 0.042758163064718246, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2221, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.586212437730998, | |
| "grad_norm": 0.041656941175460815, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2491, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5874979913225132, | |
| "grad_norm": 0.03713398054242134, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1754, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5887835449140286, | |
| "grad_norm": 0.0447508729994297, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2792, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5900690985055439, | |
| "grad_norm": 0.04686212167143822, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2609, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5913546520970593, | |
| "grad_norm": 0.040732961148023605, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2089, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5926402056885747, | |
| "grad_norm": 0.04114542156457901, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2315, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.59392575928009, | |
| "grad_norm": 0.040324702858924866, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2778, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5952113128716053, | |
| "grad_norm": 0.0678023248910904, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3029, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5964968664631207, | |
| "grad_norm": 0.04701264947652817, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2829, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.597782420054636, | |
| "grad_norm": 0.03481682017445564, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2345, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5990679736461514, | |
| "grad_norm": 0.0509064756333828, | |
| "learning_rate": 2e-05, | |
| "loss": 0.303, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6003535272376668, | |
| "grad_norm": 0.052839163690805435, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2798, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.601639080829182, | |
| "grad_norm": 0.03605001047253609, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1783, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6029246344206974, | |
| "grad_norm": 0.03640325739979744, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2498, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6042101880122127, | |
| "grad_norm": 0.03874512016773224, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1996, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6054957416037281, | |
| "grad_norm": 0.03477559611201286, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2121, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.6067812951952435, | |
| "grad_norm": 0.04953417927026749, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2821, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6080668487867588, | |
| "grad_norm": 0.04992024600505829, | |
| "learning_rate": 2e-05, | |
| "loss": 0.362, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.6093524023782741, | |
| "grad_norm": 0.048429060727357864, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2217, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6106379559697895, | |
| "grad_norm": 0.05344587191939354, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2989, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6119235095613048, | |
| "grad_norm": 0.04274825379252434, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2424, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6132090631528202, | |
| "grad_norm": 0.04651128128170967, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3412, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.6144946167443355, | |
| "grad_norm": 0.05821945145726204, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2726, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.6157801703358509, | |
| "grad_norm": 0.0519278421998024, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2927, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.6170657239273662, | |
| "grad_norm": 0.03331352025270462, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1688, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6183512775188815, | |
| "grad_norm": 0.04451346397399902, | |
| "learning_rate": 2e-05, | |
| "loss": 0.213, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.6196368311103969, | |
| "grad_norm": 0.04776597023010254, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2826, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.6209223847019123, | |
| "grad_norm": 0.0488264262676239, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2886, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.6222079382934276, | |
| "grad_norm": 0.04393550381064415, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1949, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.623493491884943, | |
| "grad_norm": 0.050872016698122025, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2201, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.6247790454764584, | |
| "grad_norm": 0.06177595257759094, | |
| "learning_rate": 2e-05, | |
| "loss": 0.318, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.6260645990679736, | |
| "grad_norm": 0.03842415288090706, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1763, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.627350152659489, | |
| "grad_norm": 0.04788699373602867, | |
| "learning_rate": 2e-05, | |
| "loss": 0.247, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.6286357062510043, | |
| "grad_norm": 0.05789102241396904, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3338, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.6299212598425197, | |
| "grad_norm": 0.04298072308301926, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2321, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6312068134340351, | |
| "grad_norm": 0.03914102911949158, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2341, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.6324923670255503, | |
| "grad_norm": 0.04699448123574257, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2462, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6337779206170657, | |
| "grad_norm": 0.04092938452959061, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2378, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.6350634742085811, | |
| "grad_norm": 0.0463721826672554, | |
| "learning_rate": 2e-05, | |
| "loss": 0.225, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6363490278000964, | |
| "grad_norm": 0.0489421971142292, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2341, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.6376345813916118, | |
| "grad_norm": 0.04278067871928215, | |
| "learning_rate": 2e-05, | |
| "loss": 0.234, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.6389201349831272, | |
| "grad_norm": 0.04674089327454567, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2755, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.6402056885746424, | |
| "grad_norm": 0.056766536086797714, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3656, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6414912421661578, | |
| "grad_norm": 0.04216759279370308, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2931, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.6427767957576731, | |
| "grad_norm": 0.04742797464132309, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3386, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6440623493491885, | |
| "grad_norm": 0.05907592922449112, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3194, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.6453479029407039, | |
| "grad_norm": 0.047280214726924896, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2993, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6466334565322192, | |
| "grad_norm": 0.03869684040546417, | |
| "learning_rate": 2e-05, | |
| "loss": 0.158, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.6479190101237345, | |
| "grad_norm": 0.04897621273994446, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1948, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6492045637152499, | |
| "grad_norm": 0.055846258997917175, | |
| "learning_rate": 2e-05, | |
| "loss": 0.312, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.6504901173067652, | |
| "grad_norm": 0.04266876354813576, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2377, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6517756708982806, | |
| "grad_norm": 0.050029207020998, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1797, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.6530612244897959, | |
| "grad_norm": 0.035082824528217316, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1799, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6543467780813113, | |
| "grad_norm": 0.04430130124092102, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2327, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.6556323316728266, | |
| "grad_norm": 0.03854670003056526, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2235, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6569178852643419, | |
| "grad_norm": 0.04970936104655266, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2818, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.6582034388558573, | |
| "grad_norm": 0.04700899496674538, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2417, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6594889924473727, | |
| "grad_norm": 0.04256317391991615, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2703, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.660774546038888, | |
| "grad_norm": 0.04744260385632515, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2048, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.6620600996304034, | |
| "grad_norm": 0.04310823976993561, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1897, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.6633456532219187, | |
| "grad_norm": 0.04300684109330177, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2139, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.664631206813434, | |
| "grad_norm": 0.05581510066986084, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2616, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.6659167604049494, | |
| "grad_norm": 0.055505942553281784, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2915, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6672023139964647, | |
| "grad_norm": 0.040814101696014404, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2187, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6684878675879801, | |
| "grad_norm": 0.05864616110920906, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3499, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6697734211794955, | |
| "grad_norm": 0.057373858988285065, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3538, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.6710589747710107, | |
| "grad_norm": 0.041141483932733536, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2711, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6723445283625261, | |
| "grad_norm": 0.03994324058294296, | |
| "learning_rate": 2e-05, | |
| "loss": 0.182, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.6736300819540415, | |
| "grad_norm": 0.04982011020183563, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2911, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6749156355455568, | |
| "grad_norm": 0.04852016270160675, | |
| "learning_rate": 2e-05, | |
| "loss": 0.254, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.6762011891370722, | |
| "grad_norm": 0.05752996355295181, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2969, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6774867427285874, | |
| "grad_norm": 0.04058138653635979, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1861, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.6787722963201028, | |
| "grad_norm": 0.05575535446405411, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3174, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6800578499116182, | |
| "grad_norm": 0.0468176007270813, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2699, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.6813434035031335, | |
| "grad_norm": 0.054678115993738174, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3051, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6826289570946489, | |
| "grad_norm": 0.055189572274684906, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2397, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.6839145106861643, | |
| "grad_norm": 0.048087868839502335, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2302, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.6852000642776795, | |
| "grad_norm": 0.057727813720703125, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2457, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.6864856178691949, | |
| "grad_norm": 0.04846923425793648, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2506, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6877711714607103, | |
| "grad_norm": 0.0410042330622673, | |
| "learning_rate": 2e-05, | |
| "loss": 0.198, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.6890567250522256, | |
| "grad_norm": 0.05333555117249489, | |
| "learning_rate": 2e-05, | |
| "loss": 0.283, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.690342278643741, | |
| "grad_norm": 0.05376364290714264, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3337, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6916278322352563, | |
| "grad_norm": 0.04879291355609894, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3075, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6929133858267716, | |
| "grad_norm": 0.0375969335436821, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1652, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.694198939418287, | |
| "grad_norm": 0.042424045503139496, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2398, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6954844930098023, | |
| "grad_norm": 0.048496536910533905, | |
| "learning_rate": 2e-05, | |
| "loss": 0.239, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6967700466013177, | |
| "grad_norm": 0.04180686175823212, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2521, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6980556001928331, | |
| "grad_norm": 0.046767883002758026, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2113, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6993411537843484, | |
| "grad_norm": 0.05949412286281586, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3443, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7006267073758637, | |
| "grad_norm": 0.04437008500099182, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2244, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7019122609673791, | |
| "grad_norm": 0.04240270331501961, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2239, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.7031978145588944, | |
| "grad_norm": 0.04866647720336914, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2846, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.7044833681504098, | |
| "grad_norm": 0.04255237057805061, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1759, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7057689217419251, | |
| "grad_norm": 0.04113907366991043, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2481, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.7070544753334405, | |
| "grad_norm": 0.04230246692895889, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1963, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7083400289249558, | |
| "grad_norm": 0.05263131856918335, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2355, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.7096255825164711, | |
| "grad_norm": 0.041025299578905106, | |
| "learning_rate": 2e-05, | |
| "loss": 0.193, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7109111361079865, | |
| "grad_norm": 0.048196010291576385, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2183, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.7121966896995019, | |
| "grad_norm": 0.05287821963429451, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2969, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7134822432910172, | |
| "grad_norm": 0.04392276331782341, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2029, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.7147677968825326, | |
| "grad_norm": 0.05237026512622833, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2653, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.7160533504740478, | |
| "grad_norm": 0.05913091078400612, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2944, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.7173389040655632, | |
| "grad_norm": 0.04113471135497093, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2411, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.7186244576570786, | |
| "grad_norm": 0.040105462074279785, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1857, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.7199100112485939, | |
| "grad_norm": 0.058607831597328186, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1984, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7211955648401093, | |
| "grad_norm": 0.043256357312202454, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2584, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.7224811184316247, | |
| "grad_norm": 0.05908385291695595, | |
| "learning_rate": 2e-05, | |
| "loss": 0.33, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.7237666720231399, | |
| "grad_norm": 0.050697483122348785, | |
| "learning_rate": 2e-05, | |
| "loss": 0.242, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.7250522256146553, | |
| "grad_norm": 0.05611984431743622, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3334, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.7263377792061707, | |
| "grad_norm": 0.05749541521072388, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2454, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.727623332797686, | |
| "grad_norm": 0.05453288555145264, | |
| "learning_rate": 2e-05, | |
| "loss": 0.249, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.7289088863892014, | |
| "grad_norm": 0.061655569821596146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2954, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.7301944399807166, | |
| "grad_norm": 0.051404744386672974, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2356, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.731479993572232, | |
| "grad_norm": 0.04265725240111351, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1842, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.7327655471637474, | |
| "grad_norm": 0.06363217532634735, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3187, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7340511007552627, | |
| "grad_norm": 0.04742373526096344, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2286, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.7353366543467781, | |
| "grad_norm": 0.05723915994167328, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3183, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7366222079382935, | |
| "grad_norm": 0.04636276140809059, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2172, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.7379077615298087, | |
| "grad_norm": 0.041882552206516266, | |
| "learning_rate": 2e-05, | |
| "loss": 0.195, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7391933151213241, | |
| "grad_norm": 0.05022399127483368, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2564, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.7404788687128395, | |
| "grad_norm": 0.058215439319610596, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3047, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7417644223043548, | |
| "grad_norm": 0.04993325099349022, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1955, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.7430499758958702, | |
| "grad_norm": 0.05288231745362282, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3005, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7443355294873855, | |
| "grad_norm": 0.055686481297016144, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3304, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.7456210830789008, | |
| "grad_norm": 0.06084279343485832, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3377, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7469066366704162, | |
| "grad_norm": 0.041104961186647415, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2019, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.7481921902619315, | |
| "grad_norm": 0.04409842938184738, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2383, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7494777438534469, | |
| "grad_norm": 0.050962381064891815, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2439, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.7507632974449623, | |
| "grad_norm": 0.05231870710849762, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2337, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7520488510364776, | |
| "grad_norm": 0.04085131362080574, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1451, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.753334404627993, | |
| "grad_norm": 0.04120944067835808, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2029, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.7546199582195082, | |
| "grad_norm": 0.0363801047205925, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1393, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.7559055118110236, | |
| "grad_norm": 0.04919865354895592, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2308, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.757191065402539, | |
| "grad_norm": 0.0516657792031765, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3006, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.7584766189940543, | |
| "grad_norm": 0.07350458204746246, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3796, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7597621725855697, | |
| "grad_norm": 0.05353572219610214, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2548, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.761047726177085, | |
| "grad_norm": 0.04492725431919098, | |
| "learning_rate": 2e-05, | |
| "loss": 0.199, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7623332797686003, | |
| "grad_norm": 0.04892539232969284, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2108, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.7636188333601157, | |
| "grad_norm": 0.03860924020409584, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1896, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7649043869516311, | |
| "grad_norm": 0.052807312458753586, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2709, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.7661899405431464, | |
| "grad_norm": 0.04871145263314247, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2779, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7674754941346618, | |
| "grad_norm": 0.04021324962377548, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2136, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.768761047726177, | |
| "grad_norm": 0.050265613943338394, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2601, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7700466013176924, | |
| "grad_norm": 0.03576774150133133, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2114, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.7713321549092078, | |
| "grad_norm": 0.055398743599653244, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2701, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7726177085007231, | |
| "grad_norm": 0.06506812572479248, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3518, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.7739032620922385, | |
| "grad_norm": 0.037148088216781616, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1438, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7751888156837539, | |
| "grad_norm": 0.046173613518476486, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2294, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.7764743692752691, | |
| "grad_norm": 0.06617863476276398, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2888, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7777599228667845, | |
| "grad_norm": 0.051207173615694046, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2624, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.7790454764582998, | |
| "grad_norm": 0.041766516864299774, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1881, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7803310300498152, | |
| "grad_norm": 0.05160610005259514, | |
| "learning_rate": 2e-05, | |
| "loss": 0.258, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.7816165836413306, | |
| "grad_norm": 0.04584109038114548, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2087, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7829021372328459, | |
| "grad_norm": 0.04200456291437149, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2036, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7841876908243612, | |
| "grad_norm": 0.039162181317806244, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1833, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7854732444158766, | |
| "grad_norm": 0.041861940175294876, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1623, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.7867587980073919, | |
| "grad_norm": 0.05622352659702301, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3556, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7880443515989073, | |
| "grad_norm": 0.048621952533721924, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2211, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.7893299051904227, | |
| "grad_norm": 0.0437590628862381, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2015, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.790615458781938, | |
| "grad_norm": 0.05675414949655533, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2416, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.7919010123734533, | |
| "grad_norm": 0.03869640827178955, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1655, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7931865659649686, | |
| "grad_norm": 0.04821722209453583, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1902, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.794472119556484, | |
| "grad_norm": 0.04423803463578224, | |
| "learning_rate": 2e-05, | |
| "loss": 0.157, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.7957576731479994, | |
| "grad_norm": 0.04364867880940437, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2406, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.7970432267395147, | |
| "grad_norm": 0.059711892157793045, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2981, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.79832878033103, | |
| "grad_norm": 0.046063173562288284, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2184, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.7996143339225454, | |
| "grad_norm": 0.06073896959424019, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2351, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.8008998875140607, | |
| "grad_norm": 0.039248064160346985, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1888, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.8021854411055761, | |
| "grad_norm": 0.05402129143476486, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3368, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8034709946970915, | |
| "grad_norm": 0.04230786859989166, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1748, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.8047565482886068, | |
| "grad_norm": 0.06045274809002876, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3958, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.8060421018801222, | |
| "grad_norm": 0.04717743769288063, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2704, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.8073276554716374, | |
| "grad_norm": 0.04878292232751846, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2412, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.8086132090631528, | |
| "grad_norm": 0.038947124034166336, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2169, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.8098987626546682, | |
| "grad_norm": 0.0614759586751461, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3013, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8111843162461835, | |
| "grad_norm": 0.06246621906757355, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2947, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.8124698698376989, | |
| "grad_norm": 0.06976212561130524, | |
| "learning_rate": 2e-05, | |
| "loss": 0.297, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.8137554234292143, | |
| "grad_norm": 0.03317941352725029, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1375, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.8150409770207295, | |
| "grad_norm": 0.06765579432249069, | |
| "learning_rate": 2e-05, | |
| "loss": 0.258, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.06797792762517929, | |
| "learning_rate": 2e-05, | |
| "loss": 0.256, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.8176120842037602, | |
| "grad_norm": 0.059785496443510056, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3343, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.8188976377952756, | |
| "grad_norm": 0.059780728071928024, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3634, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.820183191386791, | |
| "grad_norm": 0.04111599549651146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2011, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.8214687449783062, | |
| "grad_norm": 0.04656028002500534, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2214, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.8227542985698216, | |
| "grad_norm": 0.054362326860427856, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2928, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.824039852161337, | |
| "grad_norm": 0.04594152048230171, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2285, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.8253254057528523, | |
| "grad_norm": 0.056715745478868484, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2531, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.8266109593443677, | |
| "grad_norm": 0.049057237803936005, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1749, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.8278965129358831, | |
| "grad_norm": 0.05435045436024666, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2796, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8291820665273983, | |
| "grad_norm": 0.049284275621175766, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2381, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.8304676201189137, | |
| "grad_norm": 0.044050633907318115, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2804, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.831753173710429, | |
| "grad_norm": 0.054185982793569565, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2617, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.8330387273019444, | |
| "grad_norm": 0.0534062534570694, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2502, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8343242808934598, | |
| "grad_norm": 0.06242300197482109, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2662, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.8356098344849751, | |
| "grad_norm": 0.0385594442486763, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1897, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8368953880764904, | |
| "grad_norm": 0.065641388297081, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3179, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.8381809416680058, | |
| "grad_norm": 0.054985061287879944, | |
| "learning_rate": 2e-05, | |
| "loss": 0.222, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.8394664952595211, | |
| "grad_norm": 0.05766449496150017, | |
| "learning_rate": 2e-05, | |
| "loss": 0.289, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.8407520488510365, | |
| "grad_norm": 0.04635515809059143, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2464, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.8420376024425518, | |
| "grad_norm": 0.0583229660987854, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2436, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.8433231560340672, | |
| "grad_norm": 0.04983345419168472, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2534, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8446087096255825, | |
| "grad_norm": 0.04292474314570427, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1772, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.8458942632170978, | |
| "grad_norm": 0.05735989660024643, | |
| "learning_rate": 2e-05, | |
| "loss": 0.267, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8471798168086132, | |
| "grad_norm": 0.055415477603673935, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2651, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.8484653704001286, | |
| "grad_norm": 0.052020199596881866, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2177, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8497509239916439, | |
| "grad_norm": 0.05934329703450203, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2665, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.8510364775831593, | |
| "grad_norm": 0.06611707806587219, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3774, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8523220311746746, | |
| "grad_norm": 0.05337178707122803, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2699, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.8536075847661899, | |
| "grad_norm": 0.05552757531404495, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2204, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8548931383577053, | |
| "grad_norm": 0.051326069980859756, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1791, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.8561786919492206, | |
| "grad_norm": 0.04780028760433197, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1959, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.857464245540736, | |
| "grad_norm": 0.06344909965991974, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2809, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.8587497991322514, | |
| "grad_norm": 0.0526767373085022, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2547, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.8600353527237666, | |
| "grad_norm": 0.04369194433093071, | |
| "learning_rate": 2e-05, | |
| "loss": 0.233, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.861320906315282, | |
| "grad_norm": 0.05023709312081337, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2576, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8626064599067974, | |
| "grad_norm": 0.06402754783630371, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2579, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.8638920134983127, | |
| "grad_norm": 0.06747744977474213, | |
| "learning_rate": 2e-05, | |
| "loss": 0.393, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8651775670898281, | |
| "grad_norm": 0.06799997389316559, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3114, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.8664631206813435, | |
| "grad_norm": 0.044738415628671646, | |
| "learning_rate": 2e-05, | |
| "loss": 0.222, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8677486742728587, | |
| "grad_norm": 0.05913526564836502, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2701, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.8690342278643741, | |
| "grad_norm": 0.052639495581388474, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2279, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8703197814558894, | |
| "grad_norm": 0.0436641164124012, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1722, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.8716053350474048, | |
| "grad_norm": 0.06275106966495514, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3289, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8728908886389202, | |
| "grad_norm": 0.034002162516117096, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1262, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.8741764422304354, | |
| "grad_norm": 0.04524555802345276, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1765, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8754619958219508, | |
| "grad_norm": 0.04776989668607712, | |
| "learning_rate": 2e-05, | |
| "loss": 0.242, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.8767475494134662, | |
| "grad_norm": 0.060143712908029556, | |
| "learning_rate": 2e-05, | |
| "loss": 0.24, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8780331030049815, | |
| "grad_norm": 0.06363454461097717, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3104, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.8793186565964969, | |
| "grad_norm": 0.05736486613750458, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3299, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8806042101880122, | |
| "grad_norm": 0.048391181975603104, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1937, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.8818897637795275, | |
| "grad_norm": 0.047165125608444214, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2608, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8831753173710429, | |
| "grad_norm": 0.061681345105171204, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2948, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.8844608709625582, | |
| "grad_norm": 0.060136910527944565, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2272, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8857464245540736, | |
| "grad_norm": 0.047498807311058044, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1813, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.887031978145589, | |
| "grad_norm": 0.06447866559028625, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2808, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8883175317371043, | |
| "grad_norm": 0.05992686748504639, | |
| "learning_rate": 2e-05, | |
| "loss": 0.262, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.8896030853286196, | |
| "grad_norm": 0.048196423798799515, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2238, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.890888638920135, | |
| "grad_norm": 0.06860709935426712, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2679, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.8921741925116503, | |
| "grad_norm": 0.05085690692067146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2948, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8934597461031657, | |
| "grad_norm": 0.06869999319314957, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2961, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.894745299694681, | |
| "grad_norm": 0.04691535234451294, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2019, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8960308532861964, | |
| "grad_norm": 0.04785510525107384, | |
| "learning_rate": 2e-05, | |
| "loss": 0.147, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.8973164068777117, | |
| "grad_norm": 0.06156083196401596, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2215, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.898601960469227, | |
| "grad_norm": 0.051647745072841644, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2252, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.8998875140607424, | |
| "grad_norm": 0.04751814156770706, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2482, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9011730676522578, | |
| "grad_norm": 0.05452054366469383, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2138, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.9024586212437731, | |
| "grad_norm": 0.045277033001184464, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2148, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.9037441748352885, | |
| "grad_norm": 0.045462466776371, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1711, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.9050297284268038, | |
| "grad_norm": 0.06722573935985565, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3205, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9063152820183191, | |
| "grad_norm": 0.05163208395242691, | |
| "learning_rate": 2e-05, | |
| "loss": 0.247, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.9076008356098345, | |
| "grad_norm": 0.052614837884902954, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2002, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.9088863892013498, | |
| "grad_norm": 0.03826769068837166, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1744, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.9101719427928652, | |
| "grad_norm": 0.04780410975217819, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2524, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.9114574963843806, | |
| "grad_norm": 0.03547963872551918, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1674, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.9127430499758958, | |
| "grad_norm": 0.0573282465338707, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2749, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.9140286035674112, | |
| "grad_norm": 0.0570538304746151, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2412, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.9153141571589266, | |
| "grad_norm": 0.054683949798345566, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2537, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.9165997107504419, | |
| "grad_norm": 0.05413772165775299, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2314, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.9178852643419573, | |
| "grad_norm": 0.05124877020716667, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2645, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.9191708179334726, | |
| "grad_norm": 0.06577921658754349, | |
| "learning_rate": 2e-05, | |
| "loss": 0.314, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.9204563715249879, | |
| "grad_norm": 0.05663186311721802, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2422, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.9217419251165033, | |
| "grad_norm": 0.05851929262280464, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2845, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.9230274787080186, | |
| "grad_norm": 0.06582541763782501, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2487, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.924313032299534, | |
| "grad_norm": 0.0434844084084034, | |
| "learning_rate": 2e-05, | |
| "loss": 0.191, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.9255985858910494, | |
| "grad_norm": 0.056996386498212814, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2733, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9268841394825647, | |
| "grad_norm": 0.04399803280830383, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1991, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.92816969307408, | |
| "grad_norm": 0.047656819224357605, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2274, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.9294552466655954, | |
| "grad_norm": 0.0753135085105896, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3748, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.9307408002571107, | |
| "grad_norm": 0.07544931024312973, | |
| "learning_rate": 2e-05, | |
| "loss": 0.282, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.9320263538486261, | |
| "grad_norm": 0.05577397346496582, | |
| "learning_rate": 2e-05, | |
| "loss": 0.273, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.9333119074401414, | |
| "grad_norm": 0.039960604161024094, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1423, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.9345974610316568, | |
| "grad_norm": 0.0625922679901123, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2504, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.9358830146231721, | |
| "grad_norm": 0.060125015676021576, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2061, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.9371685682146874, | |
| "grad_norm": 0.06697895377874374, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2672, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.9384541218062028, | |
| "grad_norm": 0.09079831093549728, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3944, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.9397396753977182, | |
| "grad_norm": 0.05246804282069206, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2153, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.9410252289892335, | |
| "grad_norm": 0.03938793018460274, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1496, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.9423107825807489, | |
| "grad_norm": 0.05081872642040253, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1939, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.9435963361722641, | |
| "grad_norm": 0.055075064301490784, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2314, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.9448818897637795, | |
| "grad_norm": 0.057048946619033813, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2258, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.9461674433552949, | |
| "grad_norm": 0.0564640611410141, | |
| "learning_rate": 2e-05, | |
| "loss": 0.221, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9474529969468102, | |
| "grad_norm": 0.06246118247509003, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2655, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.9487385505383256, | |
| "grad_norm": 0.06543996930122375, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3487, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.950024104129841, | |
| "grad_norm": 0.05123418942093849, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2593, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.9513096577213562, | |
| "grad_norm": 0.04761409014463425, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1717, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9525952113128716, | |
| "grad_norm": 0.05747079476714134, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2239, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.953880764904387, | |
| "grad_norm": 0.04854227229952812, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1742, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9551663184959023, | |
| "grad_norm": 0.05784037709236145, | |
| "learning_rate": 2e-05, | |
| "loss": 0.203, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.9564518720874177, | |
| "grad_norm": 0.05370228737592697, | |
| "learning_rate": 2e-05, | |
| "loss": 0.255, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9577374256789329, | |
| "grad_norm": 0.04535800218582153, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1951, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.9590229792704483, | |
| "grad_norm": 0.044412512332201004, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2087, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9603085328619637, | |
| "grad_norm": 0.05077359825372696, | |
| "learning_rate": 2e-05, | |
| "loss": 0.19, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.961594086453479, | |
| "grad_norm": 0.056578539311885834, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2784, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9628796400449944, | |
| "grad_norm": 0.04252656549215317, | |
| "learning_rate": 2e-05, | |
| "loss": 0.239, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.9641651936365098, | |
| "grad_norm": 0.04754233360290527, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1871, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.965450747228025, | |
| "grad_norm": 0.04948977380990982, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2095, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.9667363008195404, | |
| "grad_norm": 0.056569986045360565, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1627, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9680218544110558, | |
| "grad_norm": 0.058012060821056366, | |
| "learning_rate": 2e-05, | |
| "loss": 0.277, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.9693074080025711, | |
| "grad_norm": 0.06445303559303284, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3453, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9705929615940865, | |
| "grad_norm": 0.04822942987084389, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1958, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.9718785151856018, | |
| "grad_norm": 0.04951447993516922, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2342, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9731640687771171, | |
| "grad_norm": 0.04779404401779175, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2277, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.9744496223686325, | |
| "grad_norm": 0.047998420894145966, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1817, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.9757351759601478, | |
| "grad_norm": 0.050718434154987335, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2289, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.9770207295516632, | |
| "grad_norm": 0.05427386984229088, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2597, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9783062831431786, | |
| "grad_norm": 0.06047537922859192, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2597, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.9795918367346939, | |
| "grad_norm": 0.048412878066301346, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2386, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9808773903262092, | |
| "grad_norm": 0.04905233159661293, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2239, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.9821629439177245, | |
| "grad_norm": 0.052379023283720016, | |
| "learning_rate": 2e-05, | |
| "loss": 0.263, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9834484975092399, | |
| "grad_norm": 0.0489642396569252, | |
| "learning_rate": 2e-05, | |
| "loss": 0.225, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.9847340511007553, | |
| "grad_norm": 0.050984520465135574, | |
| "learning_rate": 2e-05, | |
| "loss": 0.219, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9860196046922706, | |
| "grad_norm": 0.05487053468823433, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1788, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.987305158283786, | |
| "grad_norm": 0.06488880515098572, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2994, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9885907118753013, | |
| "grad_norm": 0.057233408093452454, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3028, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.9898762654668166, | |
| "grad_norm": 0.03885122016072273, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1704, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.991161819058332, | |
| "grad_norm": 0.04395405203104019, | |
| "learning_rate": 2e-05, | |
| "loss": 0.168, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.9924473726498474, | |
| "grad_norm": 0.07156252861022949, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3431, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.9937329262413627, | |
| "grad_norm": 0.05737178027629852, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2595, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.9950184798328781, | |
| "grad_norm": 0.0596122108399868, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2177, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9963040334243933, | |
| "grad_norm": 0.0480956956744194, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2008, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.9975895870159087, | |
| "grad_norm": 0.045857105404138565, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2093, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9988751406074241, | |
| "grad_norm": 0.05208531767129898, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1512, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.9988751406074241, | |
| "step": 777, | |
| "total_flos": 525522702336000.0, | |
| "train_loss": 0.3197911096739186, | |
| "train_runtime": 4432.7229, | |
| "train_samples_per_second": 5.616, | |
| "train_steps_per_second": 0.175 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 777, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 525522702336000.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |