| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.903111111111111, | |
| "eval_steps": 500, | |
| "global_step": 1100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.035555555555555556, | |
| "grad_norm": 1.6136552095413208, | |
| "learning_rate": 1.4084507042253522e-07, | |
| "loss": 1.4283, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07111111111111111, | |
| "grad_norm": 2.3250255584716797, | |
| "learning_rate": 2.8169014084507043e-07, | |
| "loss": 1.4176, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 2.205648422241211, | |
| "learning_rate": 4.225352112676056e-07, | |
| "loss": 1.3904, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 1.679602861404419, | |
| "learning_rate": 5.633802816901409e-07, | |
| "loss": 1.3256, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 1.6885226964950562, | |
| "learning_rate": 7.04225352112676e-07, | |
| "loss": 1.2877, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 1.3719532489776611, | |
| "learning_rate": 8.450704225352112e-07, | |
| "loss": 1.2335, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24888888888888888, | |
| "grad_norm": 1.6127221584320068, | |
| "learning_rate": 9.859154929577465e-07, | |
| "loss": 1.1898, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.28444444444444444, | |
| "grad_norm": 1.3292348384857178, | |
| "learning_rate": 9.998876955784181e-07, | |
| "loss": 1.1213, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.1058685779571533, | |
| "learning_rate": 9.994995475316987e-07, | |
| "loss": 1.104, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 1.0595113039016724, | |
| "learning_rate": 9.988343845952696e-07, | |
| "loss": 1.059, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.39111111111111113, | |
| "grad_norm": 0.9761242270469666, | |
| "learning_rate": 9.978925756584284e-07, | |
| "loss": 0.9813, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 0.8893954157829285, | |
| "learning_rate": 9.966746430341582e-07, | |
| "loss": 0.9635, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4622222222222222, | |
| "grad_norm": 0.8302690982818604, | |
| "learning_rate": 9.951812621694608e-07, | |
| "loss": 0.9373, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.49777777777777776, | |
| "grad_norm": 0.74117112159729, | |
| "learning_rate": 9.93413261270763e-07, | |
| "loss": 0.9394, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.910311758518219, | |
| "learning_rate": 9.913716208446065e-07, | |
| "loss": 0.9476, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5688888888888889, | |
| "grad_norm": 0.9787248373031616, | |
| "learning_rate": 9.890574731538739e-07, | |
| "loss": 0.9403, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6044444444444445, | |
| "grad_norm": 0.6852824091911316, | |
| "learning_rate": 9.864721015898523e-07, | |
| "loss": 0.9306, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.9083530306816101, | |
| "learning_rate": 9.836169399604845e-07, | |
| "loss": 0.9356, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6755555555555556, | |
| "grad_norm": 0.6284005641937256, | |
| "learning_rate": 9.80493571695201e-07, | |
| "loss": 0.9154, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 0.8122096061706543, | |
| "learning_rate": 9.771037289667726e-07, | |
| "loss": 0.8989, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 0.6801354885101318, | |
| "learning_rate": 9.734492917306754e-07, | |
| "loss": 0.9159, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7822222222222223, | |
| "grad_norm": 1.5338674783706665, | |
| "learning_rate": 9.695322866824947e-07, | |
| "loss": 0.8969, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8177777777777778, | |
| "grad_norm": 0.9366681575775146, | |
| "learning_rate": 9.653548861339508e-07, | |
| "loss": 0.9099, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 0.8953334093093872, | |
| "learning_rate": 9.60919406808168e-07, | |
| "loss": 0.8797, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.7514542937278748, | |
| "learning_rate": 9.562283085548543e-07, | |
| "loss": 0.8666, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9244444444444444, | |
| "grad_norm": 0.7203475832939148, | |
| "learning_rate": 9.512841929861068e-07, | |
| "loss": 0.893, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.9745852947235107, | |
| "learning_rate": 9.460898020335964e-07, | |
| "loss": 0.8883, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "grad_norm": 0.9440745711326599, | |
| "learning_rate": 9.40648016427934e-07, | |
| "loss": 0.869, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0284444444444445, | |
| "grad_norm": 1.0532046556472778, | |
| "learning_rate": 9.349618541010616e-07, | |
| "loss": 0.7853, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.064, | |
| "grad_norm": 0.7366812825202942, | |
| "learning_rate": 9.290344685125519e-07, | |
| "loss": 0.8485, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0995555555555556, | |
| "grad_norm": 0.6317222118377686, | |
| "learning_rate": 9.228691469007486e-07, | |
| "loss": 0.8323, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1351111111111112, | |
| "grad_norm": 0.4928416907787323, | |
| "learning_rate": 9.16469308459712e-07, | |
| "loss": 0.881, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.1706666666666667, | |
| "grad_norm": 0.8622790575027466, | |
| "learning_rate": 9.098385024429874e-07, | |
| "loss": 0.8618, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2062222222222223, | |
| "grad_norm": 0.9656073451042175, | |
| "learning_rate": 9.029804061952424e-07, | |
| "loss": 0.8504, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.2417777777777779, | |
| "grad_norm": 0.8012099862098694, | |
| "learning_rate": 8.958988231128663e-07, | |
| "loss": 0.8289, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.2773333333333334, | |
| "grad_norm": 0.831724226474762, | |
| "learning_rate": 8.885976805346651e-07, | |
| "loss": 0.8313, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.3128888888888888, | |
| "grad_norm": 0.9381484389305115, | |
| "learning_rate": 8.810810275638182e-07, | |
| "loss": 0.8222, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.3484444444444446, | |
| "grad_norm": 0.7074716687202454, | |
| "learning_rate": 8.733530328223075e-07, | |
| "loss": 0.815, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 0.6802889704704285, | |
| "learning_rate": 8.654179821390621e-07, | |
| "loss": 0.8485, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.4195555555555557, | |
| "grad_norm": 0.6159129738807678, | |
| "learning_rate": 8.572802761731031e-07, | |
| "loss": 0.8396, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.455111111111111, | |
| "grad_norm": 1.0787162780761719, | |
| "learning_rate": 8.489444279730045e-07, | |
| "loss": 0.8342, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.4906666666666666, | |
| "grad_norm": 0.850229024887085, | |
| "learning_rate": 8.404150604740248e-07, | |
| "loss": 0.8385, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.5262222222222221, | |
| "grad_norm": 0.9370916485786438, | |
| "learning_rate": 8.316969039342963e-07, | |
| "loss": 0.7899, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.561777777777778, | |
| "grad_norm": 0.7209655046463013, | |
| "learning_rate": 8.22794793311497e-07, | |
| "loss": 0.8046, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.5973333333333333, | |
| "grad_norm": 0.8257189989089966, | |
| "learning_rate": 8.137136655814549e-07, | |
| "loss": 0.8178, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.6328888888888888, | |
| "grad_norm": 0.8620548248291016, | |
| "learning_rate": 8.044585570001769e-07, | |
| "loss": 0.807, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.6684444444444444, | |
| "grad_norm": 0.8659062385559082, | |
| "learning_rate": 7.950346003108166e-07, | |
| "loss": 0.8087, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.704, | |
| "grad_norm": 0.5293139815330505, | |
| "learning_rate": 7.854470218971332e-07, | |
| "loss": 0.7872, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.7395555555555555, | |
| "grad_norm": 0.5208423733711243, | |
| "learning_rate": 7.75701138885018e-07, | |
| "loss": 0.8161, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.775111111111111, | |
| "grad_norm": 0.7580987811088562, | |
| "learning_rate": 7.658023561936966e-07, | |
| "loss": 0.8314, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.8106666666666666, | |
| "grad_norm": 0.8971360325813293, | |
| "learning_rate": 7.557561635382432e-07, | |
| "loss": 0.806, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.8462222222222222, | |
| "grad_norm": 0.6375018954277039, | |
| "learning_rate": 7.455681323850668e-07, | |
| "loss": 0.7969, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.8817777777777778, | |
| "grad_norm": 1.017171859741211, | |
| "learning_rate": 7.352439128620609e-07, | |
| "loss": 0.7974, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.9173333333333333, | |
| "grad_norm": 0.8392543196678162, | |
| "learning_rate": 7.247892306251275e-07, | |
| "loss": 0.807, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.952888888888889, | |
| "grad_norm": 1.016851782798767, | |
| "learning_rate": 7.142098836828161e-07, | |
| "loss": 0.8062, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.9884444444444445, | |
| "grad_norm": 0.8153456449508667, | |
| "learning_rate": 7.035117391808341e-07, | |
| "loss": 0.7673, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.021333333333333, | |
| "grad_norm": 0.7162724733352661, | |
| "learning_rate": 6.927007301482186e-07, | |
| "loss": 0.7502, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.056888888888889, | |
| "grad_norm": 0.9724966883659363, | |
| "learning_rate": 6.817828522069667e-07, | |
| "loss": 0.7868, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.0924444444444443, | |
| "grad_norm": 1.1692003011703491, | |
| "learning_rate": 6.707641602469553e-07, | |
| "loss": 0.7739, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.7322782874107361, | |
| "learning_rate": 6.596507650679899e-07, | |
| "loss": 0.7829, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.1635555555555555, | |
| "grad_norm": 0.9158796072006226, | |
| "learning_rate": 6.484488299908486e-07, | |
| "loss": 0.772, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.1991111111111112, | |
| "grad_norm": 0.8015128374099731, | |
| "learning_rate": 6.371645674391966e-07, | |
| "loss": 0.7806, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.2346666666666666, | |
| "grad_norm": 0.7846320271492004, | |
| "learning_rate": 6.258042354942707e-07, | |
| "loss": 0.775, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.2702222222222224, | |
| "grad_norm": 0.8747680187225342, | |
| "learning_rate": 6.143741344242423e-07, | |
| "loss": 0.7837, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.3057777777777777, | |
| "grad_norm": 0.8119185566902161, | |
| "learning_rate": 6.028806031901829e-07, | |
| "loss": 0.7519, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.3413333333333335, | |
| "grad_norm": 0.8647979497909546, | |
| "learning_rate": 5.91330015930574e-07, | |
| "loss": 0.7715, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.376888888888889, | |
| "grad_norm": 0.8015746474266052, | |
| "learning_rate": 5.797287784263046e-07, | |
| "loss": 0.7829, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.4124444444444446, | |
| "grad_norm": 0.715522289276123, | |
| "learning_rate": 5.680833245481234e-07, | |
| "loss": 0.7719, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.9125120639801025, | |
| "learning_rate": 5.564001126885105e-07, | |
| "loss": 0.7632, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.4835555555555557, | |
| "grad_norm": 0.9937298893928528, | |
| "learning_rate": 5.446856221799514e-07, | |
| "loss": 0.7511, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.519111111111111, | |
| "grad_norm": 0.5765209794044495, | |
| "learning_rate": 5.329463497015968e-07, | |
| "loss": 0.7581, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.554666666666667, | |
| "grad_norm": 0.841436505317688, | |
| "learning_rate": 5.211888056763029e-07, | |
| "loss": 0.7813, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.590222222222222, | |
| "grad_norm": 1.1379077434539795, | |
| "learning_rate": 5.094195106600489e-07, | |
| "loss": 0.7874, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.6257777777777775, | |
| "grad_norm": 0.7455689311027527, | |
| "learning_rate": 4.976449917257365e-07, | |
| "loss": 0.797, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.6613333333333333, | |
| "grad_norm": 0.6947171092033386, | |
| "learning_rate": 4.858717788433725e-07, | |
| "loss": 0.7531, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.696888888888889, | |
| "grad_norm": 0.8182320594787598, | |
| "learning_rate": 4.741064012586478e-07, | |
| "loss": 0.7659, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.7324444444444445, | |
| "grad_norm": 0.8583469390869141, | |
| "learning_rate": 4.6235538387191507e-07, | |
| "loss": 0.753, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.6977065205574036, | |
| "learning_rate": 4.50625243619579e-07, | |
| "loss": 0.7786, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.8035555555555556, | |
| "grad_norm": 0.8603796362876892, | |
| "learning_rate": 4.3892248585990147e-07, | |
| "loss": 0.7842, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.8391111111111114, | |
| "grad_norm": 0.6347509026527405, | |
| "learning_rate": 4.27253600765228e-07, | |
| "loss": 0.7808, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.8746666666666667, | |
| "grad_norm": 0.6170427203178406, | |
| "learning_rate": 4.1562505972263726e-07, | |
| "loss": 0.7623, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.910222222222222, | |
| "grad_norm": 0.6599701046943665, | |
| "learning_rate": 4.0404331174500656e-07, | |
| "loss": 0.7692, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.945777777777778, | |
| "grad_norm": 0.6815395951271057, | |
| "learning_rate": 3.9251477989448795e-07, | |
| "loss": 0.8188, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.981333333333333, | |
| "grad_norm": 0.5231301784515381, | |
| "learning_rate": 3.810458577203749e-07, | |
| "loss": 0.7577, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.014222222222222, | |
| "grad_norm": 0.6689186692237854, | |
| "learning_rate": 3.696429057133358e-07, | |
| "loss": 0.715, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.049777777777778, | |
| "grad_norm": 0.7008723020553589, | |
| "learning_rate": 3.583122477779834e-07, | |
| "loss": 0.782, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.0853333333333333, | |
| "grad_norm": 0.915671706199646, | |
| "learning_rate": 3.470601677257323e-07, | |
| "loss": 0.8049, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.120888888888889, | |
| "grad_norm": 0.6437973976135254, | |
| "learning_rate": 3.3589290578989213e-07, | |
| "loss": 0.7404, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.1564444444444444, | |
| "grad_norm": 0.6364536285400391, | |
| "learning_rate": 3.2481665516492876e-07, | |
| "loss": 0.7662, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.192, | |
| "grad_norm": 0.7271984219551086, | |
| "learning_rate": 3.138375585718125e-07, | |
| "loss": 0.7738, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.2275555555555555, | |
| "grad_norm": 0.6700648665428162, | |
| "learning_rate": 3.0296170485135784e-07, | |
| "loss": 0.735, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.2631111111111113, | |
| "grad_norm": 0.6754481196403503, | |
| "learning_rate": 2.9219512558744486e-07, | |
| "loss": 0.7539, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.2986666666666666, | |
| "grad_norm": 0.8119938969612122, | |
| "learning_rate": 2.815437917619932e-07, | |
| "loss": 0.7498, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.3342222222222224, | |
| "grad_norm": 0.5352524518966675, | |
| "learning_rate": 2.7101361044354696e-07, | |
| "loss": 0.7316, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.3697777777777778, | |
| "grad_norm": 0.7653639316558838, | |
| "learning_rate": 2.6061042151130323e-07, | |
| "loss": 0.73, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.405333333333333, | |
| "grad_norm": 0.7560474872589111, | |
| "learning_rate": 2.5033999441640344e-07, | |
| "loss": 0.7561, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.440888888888889, | |
| "grad_norm": 0.7517653703689575, | |
| "learning_rate": 2.4020802498228334e-07, | |
| "loss": 0.7382, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.4764444444444447, | |
| "grad_norm": 1.0488708019256592, | |
| "learning_rate": 2.3022013224585519e-07, | |
| "loss": 0.7805, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.512, | |
| "grad_norm": 0.8792369365692139, | |
| "learning_rate": 2.203818553412757e-07, | |
| "loss": 0.7754, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.5475555555555554, | |
| "grad_norm": 0.6874270439147949, | |
| "learning_rate": 2.10698650428025e-07, | |
| "loss": 0.7465, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.583111111111111, | |
| "grad_norm": 0.7939172983169556, | |
| "learning_rate": 2.011758876650037e-07, | |
| "loss": 0.7451, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.618666666666667, | |
| "grad_norm": 0.7084336876869202, | |
| "learning_rate": 1.9181884823232413e-07, | |
| "loss": 0.7559, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.6542222222222223, | |
| "grad_norm": 0.6327200531959534, | |
| "learning_rate": 1.82632721402448e-07, | |
| "loss": 0.7191, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.6897777777777776, | |
| "grad_norm": 0.5157420635223389, | |
| "learning_rate": 1.7362260166229308e-07, | |
| "loss": 0.7336, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.7253333333333334, | |
| "grad_norm": 0.5553033947944641, | |
| "learning_rate": 1.6479348588791e-07, | |
| "loss": 0.7527, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.7608888888888887, | |
| "grad_norm": 0.7045750617980957, | |
| "learning_rate": 1.561502705732883e-07, | |
| "loss": 0.7352, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.7964444444444445, | |
| "grad_norm": 0.70656418800354, | |
| "learning_rate": 1.4769774911483686e-07, | |
| "loss": 0.7666, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.832, | |
| "grad_norm": 0.8279157876968384, | |
| "learning_rate": 1.394406091530367e-07, | |
| "loss": 0.7362, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.8675555555555556, | |
| "grad_norm": 0.7268490195274353, | |
| "learning_rate": 1.313834299727488e-07, | |
| "loss": 0.7346, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.903111111111111, | |
| "grad_norm": 0.5250927209854126, | |
| "learning_rate": 1.2353067996361033e-07, | |
| "loss": 0.7359, | |
| "step": 1100 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1405, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.602096798242701e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |