| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 268, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007511737089201878, |
| "grad_norm": 1.5546875, |
| "learning_rate": 5e-05, |
| "loss": 0.7059, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.015023474178403756, |
| "grad_norm": 2.046875, |
| "learning_rate": 4.996268656716418e-05, |
| "loss": 0.8614, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.022535211267605635, |
| "grad_norm": 14.375, |
| "learning_rate": 4.992537313432836e-05, |
| "loss": 0.799, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.03004694835680751, |
| "grad_norm": 2.421875, |
| "learning_rate": 4.988805970149254e-05, |
| "loss": 0.7599, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.03755868544600939, |
| "grad_norm": 1.7890625, |
| "learning_rate": 4.985074626865672e-05, |
| "loss": 0.7254, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04507042253521127, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.98134328358209e-05, |
| "loss": 0.7982, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05258215962441314, |
| "grad_norm": 1.546875, |
| "learning_rate": 4.977611940298508e-05, |
| "loss": 0.7192, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06009389671361502, |
| "grad_norm": 2.71875, |
| "learning_rate": 4.973880597014925e-05, |
| "loss": 0.8359, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0676056338028169, |
| "grad_norm": 1.4140625, |
| "learning_rate": 4.9701492537313436e-05, |
| "loss": 0.6135, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.07511737089201878, |
| "grad_norm": 3.96875, |
| "learning_rate": 4.966417910447762e-05, |
| "loss": 1.0456, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08262910798122065, |
| "grad_norm": 1.6171875, |
| "learning_rate": 4.9626865671641794e-05, |
| "loss": 0.6937, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.09014084507042254, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.9589552238805977e-05, |
| "loss": 0.6935, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.09765258215962441, |
| "grad_norm": 1.7265625, |
| "learning_rate": 4.955223880597015e-05, |
| "loss": 0.9582, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.10516431924882629, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.951492537313433e-05, |
| "loss": 0.6369, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.11267605633802817, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.9477611940298504e-05, |
| "loss": 0.8523, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.12018779342723004, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.944029850746269e-05, |
| "loss": 0.7572, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.12769953051643193, |
| "grad_norm": 1.6171875, |
| "learning_rate": 4.940298507462687e-05, |
| "loss": 0.8729, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1352112676056338, |
| "grad_norm": 1.4296875, |
| "learning_rate": 4.9365671641791045e-05, |
| "loss": 0.7657, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.14272300469483568, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.932835820895523e-05, |
| "loss": 0.7701, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.15023474178403756, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.92910447761194e-05, |
| "loss": 0.8063, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15774647887323945, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.9253731343283586e-05, |
| "loss": 0.6274, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1652582159624413, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.921641791044777e-05, |
| "loss": 0.6862, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1727699530516432, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.9179104477611944e-05, |
| "loss": 0.8265, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.18028169014084508, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.914179104477612e-05, |
| "loss": 0.6659, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.18779342723004694, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.91044776119403e-05, |
| "loss": 0.6795, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.19530516431924883, |
| "grad_norm": 3.734375, |
| "learning_rate": 4.906716417910448e-05, |
| "loss": 0.7914, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2028169014084507, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.902985074626866e-05, |
| "loss": 0.7915, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.21032863849765257, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.899253731343284e-05, |
| "loss": 0.6433, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.21784037558685446, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.895522388059702e-05, |
| "loss": 0.8444, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.22535211267605634, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.8917910447761195e-05, |
| "loss": 0.7353, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.23286384976525823, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.888059701492538e-05, |
| "loss": 0.6133, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.2403755868544601, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.884328358208955e-05, |
| "loss": 0.671, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.24788732394366197, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.8805970149253735e-05, |
| "loss": 0.7712, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.25539906103286386, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.876865671641792e-05, |
| "loss": 0.8207, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.26291079812206575, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.8731343283582094e-05, |
| "loss": 0.7435, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2704225352112676, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.869402985074627e-05, |
| "loss": 0.7014, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.27793427230046946, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.8656716417910445e-05, |
| "loss": 0.7946, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.28544600938967135, |
| "grad_norm": 1.6328125, |
| "learning_rate": 4.861940298507463e-05, |
| "loss": 0.5345, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.29295774647887324, |
| "grad_norm": 2.859375, |
| "learning_rate": 4.858208955223881e-05, |
| "loss": 0.8832, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3004694835680751, |
| "grad_norm": 2.09375, |
| "learning_rate": 4.8544776119402986e-05, |
| "loss": 0.8591, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.307981220657277, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.850746268656717e-05, |
| "loss": 0.6557, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.3154929577464789, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.8470149253731344e-05, |
| "loss": 0.715, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.32300469483568073, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.843283582089552e-05, |
| "loss": 0.6608, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.3305164319248826, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.83955223880597e-05, |
| "loss": 0.5904, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.3380281690140845, |
| "grad_norm": 2.703125, |
| "learning_rate": 4.8358208955223885e-05, |
| "loss": 0.8645, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3455399061032864, |
| "grad_norm": 5.84375, |
| "learning_rate": 4.832089552238806e-05, |
| "loss": 0.9412, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3530516431924883, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.8283582089552244e-05, |
| "loss": 0.8767, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.36056338028169016, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.824626865671642e-05, |
| "loss": 0.8197, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.36807511737089205, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.8208955223880595e-05, |
| "loss": 0.6598, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.3755868544600939, |
| "grad_norm": 4.0, |
| "learning_rate": 4.817164179104478e-05, |
| "loss": 1.1535, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38309859154929576, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.813432835820896e-05, |
| "loss": 0.6635, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.39061032863849765, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.8097014925373136e-05, |
| "loss": 0.6979, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.39812206572769954, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.805970149253732e-05, |
| "loss": 0.7167, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.4056338028169014, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.8022388059701494e-05, |
| "loss": 0.8033, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.4131455399061033, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.798507462686567e-05, |
| "loss": 0.8736, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.42065727699530514, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.794776119402985e-05, |
| "loss": 0.8361, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.428169014084507, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.7910447761194035e-05, |
| "loss": 0.6386, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.4356807511737089, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.787313432835821e-05, |
| "loss": 0.7318, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.4431924882629108, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.7835820895522394e-05, |
| "loss": 0.7141, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.4507042253521127, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.779850746268657e-05, |
| "loss": 0.6612, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4582159624413146, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.7761194029850745e-05, |
| "loss": 0.8155, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.46572769953051646, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.772388059701493e-05, |
| "loss": 0.8123, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.4732394366197183, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.768656716417911e-05, |
| "loss": 0.6706, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.4807511737089202, |
| "grad_norm": 1.25, |
| "learning_rate": 4.7649253731343286e-05, |
| "loss": 0.8376, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.48826291079812206, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.761194029850746e-05, |
| "loss": 0.7736, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.49577464788732395, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.7574626865671644e-05, |
| "loss": 0.777, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5032863849765258, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.753731343283582e-05, |
| "loss": 0.7517, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5107981220657277, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.75e-05, |
| "loss": 0.8422, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5183098591549296, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.7462686567164185e-05, |
| "loss": 0.6369, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5258215962441315, |
| "grad_norm": 1.0, |
| "learning_rate": 4.742537313432836e-05, |
| "loss": 0.6823, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.738805970149254e-05, |
| "loss": 0.6799, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.5408450704225352, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.735074626865672e-05, |
| "loss": 0.6477, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.5483568075117371, |
| "grad_norm": 1.125, |
| "learning_rate": 4.7313432835820895e-05, |
| "loss": 0.7116, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.5558685446009389, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.727611940298508e-05, |
| "loss": 0.6501, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5633802816901409, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.723880597014926e-05, |
| "loss": 0.8061, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5708920187793427, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.7201492537313436e-05, |
| "loss": 0.7363, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.5784037558685446, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.716417910447761e-05, |
| "loss": 0.7949, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.5859154929577465, |
| "grad_norm": 1.25, |
| "learning_rate": 4.7126865671641794e-05, |
| "loss": 0.7815, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.5934272300469483, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.708955223880597e-05, |
| "loss": 0.7815, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6009389671361502, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.705223880597015e-05, |
| "loss": 0.7299, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6084507042253521, |
| "grad_norm": 1.25, |
| "learning_rate": 4.7014925373134335e-05, |
| "loss": 0.5975, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.615962441314554, |
| "grad_norm": 1.8984375, |
| "learning_rate": 4.697761194029851e-05, |
| "loss": 0.8056, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.6234741784037559, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.6940298507462687e-05, |
| "loss": 0.8034, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.6309859154929578, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.690298507462687e-05, |
| "loss": 0.6472, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6384976525821596, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.6865671641791045e-05, |
| "loss": 0.8561, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6460093896713615, |
| "grad_norm": 2.90625, |
| "learning_rate": 4.682835820895523e-05, |
| "loss": 0.8774, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.6535211267605634, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.67910447761194e-05, |
| "loss": 0.6865, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.6610328638497652, |
| "grad_norm": 1.53125, |
| "learning_rate": 4.6753731343283586e-05, |
| "loss": 0.798, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.6685446009389672, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.671641791044776e-05, |
| "loss": 0.7587, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.676056338028169, |
| "grad_norm": 1.25, |
| "learning_rate": 4.667910447761194e-05, |
| "loss": 0.6989, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6835680751173709, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.664179104477612e-05, |
| "loss": 0.639, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.6910798122065728, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.66044776119403e-05, |
| "loss": 0.6844, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.6985915492957746, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.656716417910448e-05, |
| "loss": 0.7513, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.7061032863849765, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.652985074626866e-05, |
| "loss": 0.6264, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.7136150234741784, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.6492537313432837e-05, |
| "loss": 0.7228, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7211267605633803, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.645522388059701e-05, |
| "loss": 0.5355, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7286384976525822, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.6417910447761195e-05, |
| "loss": 0.7717, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.7361502347417841, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.638059701492538e-05, |
| "loss": 0.7365, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.7436619718309859, |
| "grad_norm": 3.375, |
| "learning_rate": 4.634328358208955e-05, |
| "loss": 0.8382, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.7511737089201878, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.6305970149253736e-05, |
| "loss": 0.7674, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7586854460093897, |
| "grad_norm": 1.7265625, |
| "learning_rate": 4.626865671641791e-05, |
| "loss": 0.554, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.7661971830985915, |
| "grad_norm": 1.25, |
| "learning_rate": 4.623134328358209e-05, |
| "loss": 0.7921, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.7737089201877935, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.6194029850746277e-05, |
| "loss": 0.6282, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.7812206572769953, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.615671641791045e-05, |
| "loss": 0.7992, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.7887323943661971, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.611940298507463e-05, |
| "loss": 0.7313, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7962441314553991, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.608208955223881e-05, |
| "loss": 0.7154, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.8037558685446009, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.6044776119402986e-05, |
| "loss": 0.6904, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.8112676056338028, |
| "grad_norm": 1.125, |
| "learning_rate": 4.600746268656716e-05, |
| "loss": 0.7075, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.8187793427230047, |
| "grad_norm": 2.609375, |
| "learning_rate": 4.5970149253731345e-05, |
| "loss": 0.7362, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.8262910798122066, |
| "grad_norm": 1.125, |
| "learning_rate": 4.593283582089553e-05, |
| "loss": 0.6568, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8338028169014085, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.58955223880597e-05, |
| "loss": 0.7508, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.8413145539906103, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.585820895522388e-05, |
| "loss": 0.8402, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.8488262910798122, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.582089552238806e-05, |
| "loss": 0.6946, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.856338028169014, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.578358208955224e-05, |
| "loss": 0.6877, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.863849765258216, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.574626865671642e-05, |
| "loss": 0.6703, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8713615023474178, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.57089552238806e-05, |
| "loss": 0.6673, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.8788732394366198, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.567164179104478e-05, |
| "loss": 0.8, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.8863849765258216, |
| "grad_norm": 1.7734375, |
| "learning_rate": 4.5634328358208954e-05, |
| "loss": 0.7146, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.8938967136150234, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.5597014925373136e-05, |
| "loss": 0.7342, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.9014084507042254, |
| "grad_norm": 1.390625, |
| "learning_rate": 4.555970149253732e-05, |
| "loss": 0.865, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9089201877934272, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.5522388059701495e-05, |
| "loss": 0.8743, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.9164319248826291, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.548507462686568e-05, |
| "loss": 0.8124, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.923943661971831, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.544776119402985e-05, |
| "loss": 0.739, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.9314553990610329, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.541044776119403e-05, |
| "loss": 0.9343, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.9389671361502347, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.537313432835821e-05, |
| "loss": 0.8239, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9464788732394366, |
| "grad_norm": 1.125, |
| "learning_rate": 4.5335820895522394e-05, |
| "loss": 0.5928, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.9539906103286385, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.529850746268657e-05, |
| "loss": 0.6705, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.9615023474178404, |
| "grad_norm": 1.5390625, |
| "learning_rate": 4.526119402985075e-05, |
| "loss": 0.7937, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.9690140845070423, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.522388059701493e-05, |
| "loss": 0.7819, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.9765258215962441, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.5186567164179104e-05, |
| "loss": 0.7217, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.984037558685446, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.5149253731343286e-05, |
| "loss": 0.5954, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.9915492957746479, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.511194029850747e-05, |
| "loss": 0.6472, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.9990610328638497, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.5074626865671645e-05, |
| "loss": 0.6715, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.765625, |
| "learning_rate": 4.503731343283582e-05, |
| "loss": 0.7686, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.007511737089202, |
| "grad_norm": 1.625, |
| "learning_rate": 4.5e-05, |
| "loss": 0.6494, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0150234741784037, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.496268656716418e-05, |
| "loss": 0.5652, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.0225352112676056, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.492537313432836e-05, |
| "loss": 0.5451, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.0300469483568075, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.4888059701492544e-05, |
| "loss": 0.7535, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.0375586854460095, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.485074626865672e-05, |
| "loss": 0.6217, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.0450704225352112, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.4813432835820895e-05, |
| "loss": 0.6696, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0525821596244131, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.477611940298508e-05, |
| "loss": 0.5071, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.060093896713615, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.4738805970149254e-05, |
| "loss": 0.5824, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.0676056338028168, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.4701492537313436e-05, |
| "loss": 0.4513, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.0751173708920188, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.466417910447762e-05, |
| "loss": 0.6003, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.0826291079812207, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.4626865671641794e-05, |
| "loss": 0.6236, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.0901408450704226, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.458955223880597e-05, |
| "loss": 0.5397, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.0976525821596244, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.455223880597015e-05, |
| "loss": 0.8009, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.1051643192488263, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.451492537313433e-05, |
| "loss": 0.7748, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.1126760563380282, |
| "grad_norm": 3.421875, |
| "learning_rate": 4.447761194029851e-05, |
| "loss": 0.7256, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.12018779342723, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.4440298507462694e-05, |
| "loss": 0.6101, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.127699530516432, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.440298507462687e-05, |
| "loss": 0.4897, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.1352112676056338, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.4365671641791045e-05, |
| "loss": 0.7608, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.1427230046948358, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.432835820895523e-05, |
| "loss": 0.6389, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.1502347417840375, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.4291044776119403e-05, |
| "loss": 0.4912, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.1577464788732394, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.4253731343283586e-05, |
| "loss": 0.6865, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1652582159624414, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.421641791044777e-05, |
| "loss": 0.4687, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.172769953051643, |
| "grad_norm": 1.6484375, |
| "learning_rate": 4.4179104477611944e-05, |
| "loss": 0.6835, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.180281690140845, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.414179104477612e-05, |
| "loss": 0.4748, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.187793427230047, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.4104477611940296e-05, |
| "loss": 0.556, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.1953051643192487, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.406716417910448e-05, |
| "loss": 0.4781, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2028169014084507, |
| "grad_norm": 0.87890625, |
| "learning_rate": 4.402985074626866e-05, |
| "loss": 0.4693, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.2103286384976526, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.399253731343284e-05, |
| "loss": 0.5578, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.2178403755868545, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.395522388059702e-05, |
| "loss": 0.6094, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.2253521126760563, |
| "grad_norm": 1.671875, |
| "learning_rate": 4.3917910447761195e-05, |
| "loss": 0.6457, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.2328638497652582, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.388059701492537e-05, |
| "loss": 0.5279, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.2403755868544601, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.384328358208955e-05, |
| "loss": 0.5519, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.247887323943662, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.3805970149253736e-05, |
| "loss": 0.4328, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.2553990610328638, |
| "grad_norm": 1.6484375, |
| "learning_rate": 4.376865671641791e-05, |
| "loss": 0.6765, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.2629107981220657, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.3731343283582094e-05, |
| "loss": 0.4776, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.2704225352112677, |
| "grad_norm": 1.125, |
| "learning_rate": 4.369402985074627e-05, |
| "loss": 0.4864, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2779342723004694, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.3656716417910446e-05, |
| "loss": 0.5458, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.2854460093896714, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.361940298507463e-05, |
| "loss": 0.5282, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.2929577464788733, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.358208955223881e-05, |
| "loss": 0.5868, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.300469483568075, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.354477611940299e-05, |
| "loss": 0.5913, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.307981220657277, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.350746268656717e-05, |
| "loss": 0.6012, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.315492957746479, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.3470149253731345e-05, |
| "loss": 0.7648, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.3230046948356806, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.343283582089552e-05, |
| "loss": 0.5035, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.3305164319248826, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.33955223880597e-05, |
| "loss": 0.6096, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.3380281690140845, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.3358208955223886e-05, |
| "loss": 0.6028, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.3455399061032864, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.332089552238806e-05, |
| "loss": 0.4301, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3530516431924884, |
| "grad_norm": 0.875, |
| "learning_rate": 4.328358208955224e-05, |
| "loss": 0.4578, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.36056338028169, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.324626865671642e-05, |
| "loss": 0.6293, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.368075117370892, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.3208955223880596e-05, |
| "loss": 0.6577, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.375586854460094, |
| "grad_norm": 1.0, |
| "learning_rate": 4.317164179104478e-05, |
| "loss": 0.5545, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.3830985915492957, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.313432835820896e-05, |
| "loss": 0.6445, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.3906103286384977, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.3097014925373137e-05, |
| "loss": 0.3414, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.3981220657276996, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.305970149253731e-05, |
| "loss": 0.6948, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.4056338028169013, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.3022388059701495e-05, |
| "loss": 0.4784, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.4131455399061033, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.298507462686567e-05, |
| "loss": 0.665, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.4206572769953052, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.294776119402985e-05, |
| "loss": 0.5881, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.428169014084507, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.2910447761194036e-05, |
| "loss": 0.4298, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.4356807511737089, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.287313432835821e-05, |
| "loss": 0.6543, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.4431924882629108, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.283582089552239e-05, |
| "loss": 0.4931, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.4507042253521127, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.279850746268657e-05, |
| "loss": 0.5279, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.4582159624413147, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.2761194029850746e-05, |
| "loss": 0.5347, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.4657276995305164, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.272388059701493e-05, |
| "loss": 0.48, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.4732394366197183, |
| "grad_norm": 1.25, |
| "learning_rate": 4.268656716417911e-05, |
| "loss": 0.6988, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.4807511737089203, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.2649253731343286e-05, |
| "loss": 0.5859, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.488262910798122, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.261194029850746e-05, |
| "loss": 0.5283, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.495774647887324, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.2574626865671645e-05, |
| "loss": 0.4631, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.503286384976526, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.253731343283582e-05, |
| "loss": 0.5231, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.5107981220657276, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.25e-05, |
| "loss": 0.5583, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.5183098591549296, |
| "grad_norm": 0.9453125, |
| "learning_rate": 4.2462686567164186e-05, |
| "loss": 0.4876, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.5258215962441315, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.242537313432836e-05, |
| "loss": 0.4504, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.238805970149254e-05, |
| "loss": 0.4757, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.5408450704225352, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.235074626865671e-05, |
| "loss": 0.5933, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.548356807511737, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.2313432835820895e-05, |
| "loss": 0.5415, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.5558685446009388, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.227611940298508e-05, |
| "loss": 0.7041, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.563380281690141, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.2238805970149254e-05, |
| "loss": 0.5753, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.5708920187793427, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.2201492537313436e-05, |
| "loss": 0.552, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5784037558685446, |
| "grad_norm": 1.0, |
| "learning_rate": 4.216417910447761e-05, |
| "loss": 0.5549, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.5859154929577466, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.212686567164179e-05, |
| "loss": 0.6679, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.5934272300469483, |
| "grad_norm": 2.015625, |
| "learning_rate": 4.208955223880597e-05, |
| "loss": 0.796, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.6009389671361502, |
| "grad_norm": 2.953125, |
| "learning_rate": 4.205223880597015e-05, |
| "loss": 0.6427, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.6084507042253522, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.201492537313433e-05, |
| "loss": 0.466, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.615962441314554, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.197761194029851e-05, |
| "loss": 0.4242, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.6234741784037559, |
| "grad_norm": 3.0, |
| "learning_rate": 4.194029850746269e-05, |
| "loss": 0.6968, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.6309859154929578, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.190298507462686e-05, |
| "loss": 0.5988, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.6384976525821595, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.186567164179105e-05, |
| "loss": 0.4979, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.6460093896713615, |
| "grad_norm": 0.83203125, |
| "learning_rate": 4.182835820895523e-05, |
| "loss": 0.3527, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6535211267605634, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.1791044776119404e-05, |
| "loss": 0.5743, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.6610328638497651, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.1753731343283586e-05, |
| "loss": 0.6427, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.6685446009389673, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.171641791044776e-05, |
| "loss": 0.6496, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.676056338028169, |
| "grad_norm": 0.8515625, |
| "learning_rate": 4.167910447761194e-05, |
| "loss": 0.3463, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.683568075117371, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.164179104477613e-05, |
| "loss": 0.6243, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.6910798122065729, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.16044776119403e-05, |
| "loss": 0.5389, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.6985915492957746, |
| "grad_norm": 1.0, |
| "learning_rate": 4.156716417910448e-05, |
| "loss": 0.5515, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.7061032863849765, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.152985074626866e-05, |
| "loss": 0.6783, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.7136150234741785, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.149253731343284e-05, |
| "loss": 0.5099, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.7211267605633802, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.145522388059702e-05, |
| "loss": 0.461, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.7286384976525822, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.1417910447761195e-05, |
| "loss": 0.4953, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.736150234741784, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.138059701492538e-05, |
| "loss": 0.5662, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.7436619718309858, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.1343283582089554e-05, |
| "loss": 0.524, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.7511737089201878, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.130597014925373e-05, |
| "loss": 0.424, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.7586854460093897, |
| "grad_norm": 1.5625, |
| "learning_rate": 4.126865671641791e-05, |
| "loss": 0.5042, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.7661971830985914, |
| "grad_norm": 0.9765625, |
| "learning_rate": 4.1231343283582094e-05, |
| "loss": 0.5515, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.7737089201877936, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.119402985074627e-05, |
| "loss": 0.6026, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.7812206572769953, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.115671641791045e-05, |
| "loss": 0.6206, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.788732394366197, |
| "grad_norm": 1.96875, |
| "learning_rate": 4.111940298507463e-05, |
| "loss": 0.6904, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.7962441314553992, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.1082089552238804e-05, |
| "loss": 0.4693, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.803755868544601, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.104477611940299e-05, |
| "loss": 0.3648, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.8112676056338028, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.100746268656717e-05, |
| "loss": 0.6071, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.8187793427230048, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.0970149253731345e-05, |
| "loss": 0.4936, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.8262910798122065, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.093283582089553e-05, |
| "loss": 0.6627, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.8338028169014085, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.0895522388059703e-05, |
| "loss": 0.4625, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.8413145539906104, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.085820895522388e-05, |
| "loss": 0.5132, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.8488262910798121, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.082089552238806e-05, |
| "loss": 0.5464, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.856338028169014, |
| "grad_norm": 1.5390625, |
| "learning_rate": 4.0783582089552244e-05, |
| "loss": 0.6268, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.863849765258216, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.074626865671642e-05, |
| "loss": 0.4702, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.8713615023474177, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.07089552238806e-05, |
| "loss": 0.6875, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8788732394366199, |
| "grad_norm": 1.828125, |
| "learning_rate": 4.067164179104478e-05, |
| "loss": 0.5994, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.8863849765258216, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.0634328358208954e-05, |
| "loss": 0.513, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.8938967136150233, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.059701492537314e-05, |
| "loss": 0.5234, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.9014084507042255, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.055970149253732e-05, |
| "loss": 0.3845, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.9089201877934272, |
| "grad_norm": 0.8515625, |
| "learning_rate": 4.0522388059701495e-05, |
| "loss": 0.5088, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.9164319248826291, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.048507462686567e-05, |
| "loss": 0.5853, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.923943661971831, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.044776119402985e-05, |
| "loss": 0.5691, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.9314553990610328, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.041044776119403e-05, |
| "loss": 0.5251, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.9389671361502347, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.037313432835821e-05, |
| "loss": 0.5245, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.9464788732394367, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.0335820895522394e-05, |
| "loss": 0.5217, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.9539906103286384, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.029850746268657e-05, |
| "loss": 0.4795, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.9615023474178404, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.0261194029850746e-05, |
| "loss": 0.5037, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.9690140845070423, |
| "grad_norm": 1.0, |
| "learning_rate": 4.022388059701493e-05, |
| "loss": 0.5489, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.976525821596244, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.0186567164179104e-05, |
| "loss": 0.6005, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.984037558685446, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.014925373134329e-05, |
| "loss": 0.4849, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.991549295774648, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.011194029850747e-05, |
| "loss": 0.541, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.9990610328638496, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.0074626865671645e-05, |
| "loss": 0.5955, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.390625, |
| "learning_rate": 4.003731343283582e-05, |
| "loss": 0.4322, |
| "step": 268 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1340, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3712007927064166e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|