| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 247, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004048582995951417, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1e-05, |
| "loss": 1.7468, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008097165991902834, |
| "grad_norm": 0.396484375, |
| "learning_rate": 9.959514170040487e-06, |
| "loss": 1.7975, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.012145748987854251, |
| "grad_norm": 0.388671875, |
| "learning_rate": 9.919028340080973e-06, |
| "loss": 1.7482, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.016194331983805668, |
| "grad_norm": 0.443359375, |
| "learning_rate": 9.878542510121458e-06, |
| "loss": 1.7389, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.020242914979757085, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.838056680161944e-06, |
| "loss": 1.719, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.024291497975708502, |
| "grad_norm": 0.3515625, |
| "learning_rate": 9.79757085020243e-06, |
| "loss": 1.7169, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02834008097165992, |
| "grad_norm": 0.353515625, |
| "learning_rate": 9.757085020242916e-06, |
| "loss": 1.7607, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.032388663967611336, |
| "grad_norm": 0.330078125, |
| "learning_rate": 9.7165991902834e-06, |
| "loss": 1.7605, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03643724696356275, |
| "grad_norm": 0.31640625, |
| "learning_rate": 9.676113360323888e-06, |
| "loss": 1.7096, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.04048582995951417, |
| "grad_norm": 0.27734375, |
| "learning_rate": 9.635627530364373e-06, |
| "loss": 1.7763, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.044534412955465584, |
| "grad_norm": 0.283203125, |
| "learning_rate": 9.595141700404859e-06, |
| "loss": 1.7735, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.048582995951417005, |
| "grad_norm": 0.2734375, |
| "learning_rate": 9.554655870445345e-06, |
| "loss": 1.6435, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 0.240234375, |
| "learning_rate": 9.514170040485831e-06, |
| "loss": 1.6785, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.05668016194331984, |
| "grad_norm": 0.255859375, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 1.6617, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06072874493927125, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 9.433198380566803e-06, |
| "loss": 1.6475, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06477732793522267, |
| "grad_norm": 0.236328125, |
| "learning_rate": 9.392712550607288e-06, |
| "loss": 1.628, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06882591093117409, |
| "grad_norm": 0.228515625, |
| "learning_rate": 9.352226720647774e-06, |
| "loss": 1.6464, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0728744939271255, |
| "grad_norm": 0.23046875, |
| "learning_rate": 9.31174089068826e-06, |
| "loss": 1.6582, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.232421875, |
| "learning_rate": 9.271255060728746e-06, |
| "loss": 1.719, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08097165991902834, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 1.5597, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08502024291497975, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 9.190283400809717e-06, |
| "loss": 1.5132, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.08906882591093117, |
| "grad_norm": 0.203125, |
| "learning_rate": 9.149797570850203e-06, |
| "loss": 1.564, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0931174089068826, |
| "grad_norm": 0.1953125, |
| "learning_rate": 9.109311740890689e-06, |
| "loss": 1.5743, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.09716599190283401, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 9.068825910931175e-06, |
| "loss": 1.5936, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10121457489878542, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 9.02834008097166e-06, |
| "loss": 1.6408, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.177734375, |
| "learning_rate": 8.987854251012147e-06, |
| "loss": 1.5942, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.10931174089068826, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 1.5207, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.11336032388663968, |
| "grad_norm": 0.16796875, |
| "learning_rate": 8.906882591093118e-06, |
| "loss": 1.5517, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.11740890688259109, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 8.866396761133604e-06, |
| "loss": 1.6105, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.1214574898785425, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 8.82591093117409e-06, |
| "loss": 1.633, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12550607287449392, |
| "grad_norm": 0.16796875, |
| "learning_rate": 8.785425101214575e-06, |
| "loss": 1.517, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.12955465587044535, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 8.744939271255063e-06, |
| "loss": 1.5337, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.13360323886639677, |
| "grad_norm": 0.169921875, |
| "learning_rate": 8.704453441295547e-06, |
| "loss": 1.5565, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.13765182186234817, |
| "grad_norm": 0.16015625, |
| "learning_rate": 8.663967611336033e-06, |
| "loss": 1.466, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1417004048582996, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 8.62348178137652e-06, |
| "loss": 1.5937, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.145748987854251, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 8.582995951417005e-06, |
| "loss": 1.5475, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.14979757085020243, |
| "grad_norm": 0.294921875, |
| "learning_rate": 8.54251012145749e-06, |
| "loss": 1.499, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 8.502024291497976e-06, |
| "loss": 1.5268, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 0.1640625, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 1.6107, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.16194331983805668, |
| "grad_norm": 0.158203125, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 1.5444, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1659919028340081, |
| "grad_norm": 0.15234375, |
| "learning_rate": 8.380566801619434e-06, |
| "loss": 1.4915, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1700404858299595, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 8.340080971659919e-06, |
| "loss": 1.4974, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.17408906882591094, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 8.299595141700405e-06, |
| "loss": 1.4986, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.17813765182186234, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 8.259109311740891e-06, |
| "loss": 1.4471, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.18218623481781376, |
| "grad_norm": 0.169921875, |
| "learning_rate": 8.218623481781377e-06, |
| "loss": 1.4408, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1862348178137652, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 8.178137651821862e-06, |
| "loss": 1.4718, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1902834008097166, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 8.13765182186235e-06, |
| "loss": 1.4133, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.19433198380566802, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 8.097165991902834e-06, |
| "loss": 1.5276, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.19838056680161945, |
| "grad_norm": 0.15234375, |
| "learning_rate": 8.056680161943322e-06, |
| "loss": 1.4762, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.20242914979757085, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 8.016194331983806e-06, |
| "loss": 1.4971, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.20647773279352227, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 7.975708502024292e-06, |
| "loss": 1.4749, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 7.935222672064778e-06, |
| "loss": 1.4871, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2145748987854251, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 1.4168, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.21862348178137653, |
| "grad_norm": 0.14453125, |
| "learning_rate": 7.854251012145749e-06, |
| "loss": 1.5185, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.22267206477732793, |
| "grad_norm": 0.146484375, |
| "learning_rate": 7.813765182186235e-06, |
| "loss": 1.4746, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.22672064777327935, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 7.773279352226721e-06, |
| "loss": 1.5009, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.140625, |
| "learning_rate": 7.732793522267207e-06, |
| "loss": 1.4282, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.23481781376518218, |
| "grad_norm": 0.146484375, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 1.5008, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2388663967611336, |
| "grad_norm": 0.14453125, |
| "learning_rate": 7.651821862348178e-06, |
| "loss": 1.461, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.242914979757085, |
| "grad_norm": 0.154296875, |
| "learning_rate": 7.611336032388664e-06, |
| "loss": 1.3517, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24696356275303644, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 7.570850202429151e-06, |
| "loss": 1.4487, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.25101214574898784, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.5303643724696364e-06, |
| "loss": 1.4154, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2550607287449393, |
| "grad_norm": 0.138671875, |
| "learning_rate": 7.489878542510122e-06, |
| "loss": 1.4261, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2591093117408907, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 7.449392712550608e-06, |
| "loss": 1.3977, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 0.14453125, |
| "learning_rate": 7.408906882591094e-06, |
| "loss": 1.3872, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.26720647773279355, |
| "grad_norm": 0.21484375, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 1.4218, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.27125506072874495, |
| "grad_norm": 0.15234375, |
| "learning_rate": 7.327935222672065e-06, |
| "loss": 1.4589, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.27530364372469635, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 7.2874493927125516e-06, |
| "loss": 1.4256, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2793522267206478, |
| "grad_norm": 0.134765625, |
| "learning_rate": 7.246963562753037e-06, |
| "loss": 1.4481, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.2834008097165992, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 7.206477732793523e-06, |
| "loss": 1.3412, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2874493927125506, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 7.165991902834008e-06, |
| "loss": 1.4677, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.291497975708502, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 7.125506072874494e-06, |
| "loss": 1.4729, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.29554655870445345, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 7.0850202429149805e-06, |
| "loss": 1.4513, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.29959514170040485, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 7.044534412955466e-06, |
| "loss": 1.3872, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.30364372469635625, |
| "grad_norm": 0.1328125, |
| "learning_rate": 7.004048582995951e-06, |
| "loss": 1.3805, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.963562753036438e-06, |
| "loss": 1.3883, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3117408906882591, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 1.363, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.150390625, |
| "learning_rate": 6.882591093117409e-06, |
| "loss": 1.3539, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.31983805668016196, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 1.5081, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.32388663967611336, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 6.801619433198381e-06, |
| "loss": 1.4678, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.32793522267206476, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 6.761133603238867e-06, |
| "loss": 1.4031, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3319838056680162, |
| "grad_norm": 0.16796875, |
| "learning_rate": 6.720647773279353e-06, |
| "loss": 1.3495, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3360323886639676, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 6.6801619433198385e-06, |
| "loss": 1.4959, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.340080971659919, |
| "grad_norm": 0.138671875, |
| "learning_rate": 6.639676113360325e-06, |
| "loss": 1.4214, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3441295546558704, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 6.599190283400811e-06, |
| "loss": 1.3601, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3481781376518219, |
| "grad_norm": 0.150390625, |
| "learning_rate": 6.558704453441296e-06, |
| "loss": 1.4616, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3522267206477733, |
| "grad_norm": 0.169921875, |
| "learning_rate": 6.518218623481782e-06, |
| "loss": 1.3694, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3562753036437247, |
| "grad_norm": 0.146484375, |
| "learning_rate": 6.4777327935222675e-06, |
| "loss": 1.3776, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3603238866396761, |
| "grad_norm": 0.154296875, |
| "learning_rate": 6.437246963562754e-06, |
| "loss": 1.38, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.3643724696356275, |
| "grad_norm": 0.138671875, |
| "learning_rate": 6.39676113360324e-06, |
| "loss": 1.3726, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 6.356275303643725e-06, |
| "loss": 1.3127, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3724696356275304, |
| "grad_norm": 0.142578125, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 1.4069, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3765182186234818, |
| "grad_norm": 0.16015625, |
| "learning_rate": 6.275303643724697e-06, |
| "loss": 1.4103, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3805668016194332, |
| "grad_norm": 0.1484375, |
| "learning_rate": 6.234817813765183e-06, |
| "loss": 1.3563, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.194331983805668e-06, |
| "loss": 1.4154, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.38866396761133604, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 1.4294, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.39271255060728744, |
| "grad_norm": 0.1484375, |
| "learning_rate": 6.11336032388664e-06, |
| "loss": 1.3781, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3967611336032389, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 6.0728744939271254e-06, |
| "loss": 1.3883, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4008097165991903, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 6.0323886639676124e-06, |
| "loss": 1.3806, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.4048582995951417, |
| "grad_norm": 0.146484375, |
| "learning_rate": 5.991902834008098e-06, |
| "loss": 1.3858, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4089068825910931, |
| "grad_norm": 0.166015625, |
| "learning_rate": 5.951417004048583e-06, |
| "loss": 1.3581, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.41295546558704455, |
| "grad_norm": 0.16015625, |
| "learning_rate": 5.91093117408907e-06, |
| "loss": 1.4532, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.41700404858299595, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 5.870445344129555e-06, |
| "loss": 1.3921, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 5.8299595141700406e-06, |
| "loss": 1.3722, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.4251012145748988, |
| "grad_norm": 0.15625, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 1.342, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4291497975708502, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.748987854251013e-06, |
| "loss": 1.3459, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4331983805668016, |
| "grad_norm": 0.146484375, |
| "learning_rate": 5.708502024291498e-06, |
| "loss": 1.3981, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.43724696356275305, |
| "grad_norm": 0.14453125, |
| "learning_rate": 5.668016194331984e-06, |
| "loss": 1.3834, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.44129554655870445, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 5.6275303643724695e-06, |
| "loss": 1.3381, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.44534412955465585, |
| "grad_norm": 0.15234375, |
| "learning_rate": 5.5870445344129565e-06, |
| "loss": 1.3009, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4493927125506073, |
| "grad_norm": 0.15625, |
| "learning_rate": 5.546558704453442e-06, |
| "loss": 1.3596, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.4534412955465587, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 5.506072874493927e-06, |
| "loss": 1.3557, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.4574898785425101, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.465587044534414e-06, |
| "loss": 1.3832, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.138671875, |
| "learning_rate": 5.425101214574899e-06, |
| "loss": 1.3225, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.46558704453441296, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 1.3469, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.46963562753036436, |
| "grad_norm": 0.154296875, |
| "learning_rate": 5.344129554655872e-06, |
| "loss": 1.4027, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 5.303643724696357e-06, |
| "loss": 1.3497, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.4777327935222672, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 1.3458, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4817813765182186, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 5.222672064777329e-06, |
| "loss": 1.2949, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.48582995951417, |
| "grad_norm": 0.140625, |
| "learning_rate": 5.1821862348178145e-06, |
| "loss": 1.3905, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4898785425101215, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.1417004048583e-06, |
| "loss": 1.4107, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.4939271255060729, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.101214574898786e-06, |
| "loss": 1.3685, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4979757085020243, |
| "grad_norm": 0.15625, |
| "learning_rate": 5.060728744939272e-06, |
| "loss": 1.3311, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5020242914979757, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 5.020242914979757e-06, |
| "loss": 1.3534, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5060728744939271, |
| "grad_norm": 0.154296875, |
| "learning_rate": 4.9797570850202435e-06, |
| "loss": 1.4142, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5101214574898786, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 4.939271255060729e-06, |
| "loss": 1.2787, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5141700404858299, |
| "grad_norm": 0.14453125, |
| "learning_rate": 4.898785425101215e-06, |
| "loss": 1.322, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5182186234817814, |
| "grad_norm": 1.765625, |
| "learning_rate": 4.8582995951417e-06, |
| "loss": 1.397, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5222672064777328, |
| "grad_norm": 0.1953125, |
| "learning_rate": 4.817813765182186e-06, |
| "loss": 1.4061, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.150390625, |
| "learning_rate": 4.7773279352226725e-06, |
| "loss": 1.3198, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5303643724696356, |
| "grad_norm": 0.15625, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 1.4233, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5344129554655871, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 4.696356275303644e-06, |
| "loss": 1.2438, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 0.44140625, |
| "learning_rate": 4.65587044534413e-06, |
| "loss": 1.2728, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5425101214574899, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 1.4425, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5465587044534413, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 4.5748987854251014e-06, |
| "loss": 1.4055, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5506072874493927, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 4.534412955465588e-06, |
| "loss": 1.2969, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5546558704453441, |
| "grad_norm": 0.1484375, |
| "learning_rate": 4.493927125506074e-06, |
| "loss": 1.3394, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5587044534412956, |
| "grad_norm": 0.169921875, |
| "learning_rate": 4.453441295546559e-06, |
| "loss": 1.2734, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.562753036437247, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 4.412955465587045e-06, |
| "loss": 1.2709, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.5668016194331984, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 4.372469635627531e-06, |
| "loss": 1.4363, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5708502024291497, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 4.3319838056680166e-06, |
| "loss": 1.2841, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.5748987854251012, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 4.291497975708503e-06, |
| "loss": 1.4294, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 4.251012145748988e-06, |
| "loss": 1.2398, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.582995951417004, |
| "grad_norm": 0.146484375, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 1.2878, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5870445344129555, |
| "grad_norm": 0.5078125, |
| "learning_rate": 4.170040485829959e-06, |
| "loss": 1.3302, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5910931174089069, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.1295546558704455e-06, |
| "loss": 1.3835, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5951417004048583, |
| "grad_norm": 0.16796875, |
| "learning_rate": 4.089068825910931e-06, |
| "loss": 1.4713, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5991902834008097, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 4.048582995951417e-06, |
| "loss": 1.3315, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6032388663967612, |
| "grad_norm": 0.150390625, |
| "learning_rate": 4.008097165991903e-06, |
| "loss": 1.3158, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6072874493927125, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 3.967611336032389e-06, |
| "loss": 1.3414, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.611336032388664, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 3.9271255060728745e-06, |
| "loss": 1.4022, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 3.886639676113361e-06, |
| "loss": 1.3256, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6194331983805668, |
| "grad_norm": 0.14453125, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 1.3395, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6234817813765182, |
| "grad_norm": 0.146484375, |
| "learning_rate": 3.805668016194332e-06, |
| "loss": 1.301, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6275303643724697, |
| "grad_norm": 0.142578125, |
| "learning_rate": 3.7651821862348182e-06, |
| "loss": 1.3438, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 3.724696356275304e-06, |
| "loss": 1.2879, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6356275303643725, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.6842105263157896e-06, |
| "loss": 1.4294, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6396761133603239, |
| "grad_norm": 0.1640625, |
| "learning_rate": 3.6437246963562758e-06, |
| "loss": 1.4076, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.6437246963562753, |
| "grad_norm": 0.15625, |
| "learning_rate": 3.6032388663967615e-06, |
| "loss": 1.3888, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6477732793522267, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.562753036437247e-06, |
| "loss": 1.2494, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6518218623481782, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.522267206477733e-06, |
| "loss": 1.3674, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.6558704453441295, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.481781376518219e-06, |
| "loss": 1.2075, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.659919028340081, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 3.4412955465587043e-06, |
| "loss": 1.3506, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.6639676113360324, |
| "grad_norm": 0.193359375, |
| "learning_rate": 3.4008097165991905e-06, |
| "loss": 1.3442, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.6680161943319838, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 3.3603238866396766e-06, |
| "loss": 1.2816, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6720647773279352, |
| "grad_norm": 0.171875, |
| "learning_rate": 3.3198380566801623e-06, |
| "loss": 1.3368, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.6761133603238867, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.279352226720648e-06, |
| "loss": 1.366, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.680161943319838, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.2388663967611337e-06, |
| "loss": 1.3298, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.19838056680162e-06, |
| "loss": 1.3258, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.6882591093117408, |
| "grad_norm": 0.14453125, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 1.312, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 3.1174089068825913e-06, |
| "loss": 1.3936, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.6963562753036437, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 1.35, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7004048582995951, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.0364372469635627e-06, |
| "loss": 1.3484, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7044534412955465, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 2.995951417004049e-06, |
| "loss": 1.3093, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.708502024291498, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 2.955465587044535e-06, |
| "loss": 1.3015, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7125506072874493, |
| "grad_norm": 0.1484375, |
| "learning_rate": 2.9149797570850203e-06, |
| "loss": 1.3365, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7165991902834008, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 2.8744939271255064e-06, |
| "loss": 1.3336, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7206477732793523, |
| "grad_norm": 0.14453125, |
| "learning_rate": 2.834008097165992e-06, |
| "loss": 1.2927, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7246963562753036, |
| "grad_norm": 0.16796875, |
| "learning_rate": 2.7935222672064783e-06, |
| "loss": 1.29, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.728744939271255, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.7530364372469636e-06, |
| "loss": 1.2423, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7327935222672065, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 2.7125506072874497e-06, |
| "loss": 1.2913, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.672064777327936e-06, |
| "loss": 1.3249, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7408906882591093, |
| "grad_norm": 0.158203125, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 1.2356, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.7449392712550608, |
| "grad_norm": 0.154296875, |
| "learning_rate": 2.5910931174089072e-06, |
| "loss": 1.2955, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.7489878542510121, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 2.550607287449393e-06, |
| "loss": 1.3227, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7530364372469636, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 2.5101214574898787e-06, |
| "loss": 1.2789, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.757085020242915, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 2.4696356275303644e-06, |
| "loss": 1.2742, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.7611336032388664, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 2.42914979757085e-06, |
| "loss": 1.3393, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.7651821862348178, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 2.3886639676113362e-06, |
| "loss": 1.3122, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 2.348178137651822e-06, |
| "loss": 1.3016, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7732793522267206, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 1.2886, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.7773279352226721, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.267206477732794e-06, |
| "loss": 1.4194, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.7813765182186235, |
| "grad_norm": 0.1640625, |
| "learning_rate": 2.2267206477732795e-06, |
| "loss": 1.244, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.7854251012145749, |
| "grad_norm": 0.17578125, |
| "learning_rate": 2.1862348178137656e-06, |
| "loss": 1.2748, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.1457489878542513e-06, |
| "loss": 1.3047, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7935222672064778, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.105263157894737e-06, |
| "loss": 1.3679, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.7975708502024291, |
| "grad_norm": 0.16015625, |
| "learning_rate": 2.0647773279352228e-06, |
| "loss": 1.2566, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8016194331983806, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 2.0242914979757085e-06, |
| "loss": 1.3056, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.805668016194332, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 1.9838056680161946e-06, |
| "loss": 1.32, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8097165991902834, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 1.9433198380566803e-06, |
| "loss": 1.2582, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8137651821862348, |
| "grad_norm": 0.220703125, |
| "learning_rate": 1.902834008097166e-06, |
| "loss": 1.357, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8178137651821862, |
| "grad_norm": 0.154296875, |
| "learning_rate": 1.862348178137652e-06, |
| "loss": 1.3521, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8218623481781376, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 1.8218623481781379e-06, |
| "loss": 1.3194, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8259109311740891, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.7813765182186236e-06, |
| "loss": 1.3109, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8299595141700404, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.7408906882591095e-06, |
| "loss": 1.319, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8340080971659919, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 1.7004048582995952e-06, |
| "loss": 1.2524, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.8380566801619433, |
| "grad_norm": 0.171875, |
| "learning_rate": 1.6599190283400812e-06, |
| "loss": 1.2506, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.6194331983805669e-06, |
| "loss": 1.2782, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 1.5789473684210526e-06, |
| "loss": 1.338, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.8502024291497976, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 1.3054, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.854251012145749, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 1.4979757085020244e-06, |
| "loss": 1.2981, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.8582995951417004, |
| "grad_norm": 0.3359375, |
| "learning_rate": 1.4574898785425101e-06, |
| "loss": 1.2384, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.8623481781376519, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.417004048582996e-06, |
| "loss": 1.223, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.8663967611336032, |
| "grad_norm": 0.1640625, |
| "learning_rate": 1.3765182186234818e-06, |
| "loss": 1.2893, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.8704453441295547, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 1.336032388663968e-06, |
| "loss": 1.3688, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.8744939271255061, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 1.2955465587044536e-06, |
| "loss": 1.2511, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.8785425101214575, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 1.2550607287449393e-06, |
| "loss": 1.2745, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.8825910931174089, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.214574898785425e-06, |
| "loss": 1.3063, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.8866396761133604, |
| "grad_norm": 0.158203125, |
| "learning_rate": 1.174089068825911e-06, |
| "loss": 1.2903, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.8906882591093117, |
| "grad_norm": 0.150390625, |
| "learning_rate": 1.133603238866397e-06, |
| "loss": 1.3452, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 1.0931174089068828e-06, |
| "loss": 1.4376, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.8987854251012146, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 1.0526315789473685e-06, |
| "loss": 1.4119, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.902834008097166, |
| "grad_norm": 0.173828125, |
| "learning_rate": 1.0121457489878542e-06, |
| "loss": 1.3001, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9068825910931174, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 9.716599190283402e-07, |
| "loss": 1.3085, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9109311740890689, |
| "grad_norm": 0.162109375, |
| "learning_rate": 9.31174089068826e-07, |
| "loss": 1.2873, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9149797570850202, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.906882591093118e-07, |
| "loss": 1.2741, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9190283400809717, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 8.502024291497976e-07, |
| "loss": 1.3929, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 8.097165991902834e-07, |
| "loss": 1.2992, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.9271255060728745, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 1.268, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.9311740890688259, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 7.287449392712551e-07, |
| "loss": 1.3517, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9352226720647774, |
| "grad_norm": 0.154296875, |
| "learning_rate": 6.882591093117409e-07, |
| "loss": 1.2013, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.9392712550607287, |
| "grad_norm": 0.158203125, |
| "learning_rate": 6.477732793522268e-07, |
| "loss": 1.3459, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.9433198380566802, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 6.072874493927125e-07, |
| "loss": 1.3494, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.668016194331984e-07, |
| "loss": 1.2346, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.951417004048583, |
| "grad_norm": 0.16015625, |
| "learning_rate": 5.263157894736843e-07, |
| "loss": 1.2858, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.9554655870445344, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 4.858299595141701e-07, |
| "loss": 1.2212, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.9595141700404858, |
| "grad_norm": 0.16796875, |
| "learning_rate": 4.453441295546559e-07, |
| "loss": 1.3566, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.9635627530364372, |
| "grad_norm": 0.15234375, |
| "learning_rate": 4.048582995951417e-07, |
| "loss": 1.374, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.9676113360323887, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 3.6437246963562754e-07, |
| "loss": 1.2331, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.97165991902834, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.238866396761134e-07, |
| "loss": 1.3375, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9757085020242915, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 2.834008097165992e-07, |
| "loss": 1.2493, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.979757085020243, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 2.4291497975708504e-07, |
| "loss": 1.378, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.9838056680161943, |
| "grad_norm": 0.16015625, |
| "learning_rate": 2.0242914979757086e-07, |
| "loss": 1.3907, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.9878542510121457, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 1.619433198380567e-07, |
| "loss": 1.3215, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.9919028340080972, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.2145748987854252e-07, |
| "loss": 1.3432, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9959514170040485, |
| "grad_norm": 0.1875, |
| "learning_rate": 8.097165991902835e-08, |
| "loss": 1.3032, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.0485829959514176e-08, |
| "loss": 1.2984, |
| "step": 247 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 247, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.717806075910554e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|