| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3716608594657375, |
| "eval_steps": 500, |
| "global_step": 80, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004645760743321719, |
| "grad_norm": 5.007833918054177, |
| "learning_rate": 0.0, |
| "loss": 0.3559, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.009291521486643438, |
| "grad_norm": 6.095402413693695, |
| "learning_rate": 2.3255813953488374e-07, |
| "loss": 0.4478, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013937282229965157, |
| "grad_norm": 5.711860921252622, |
| "learning_rate": 4.651162790697675e-07, |
| "loss": 0.4202, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.018583042973286876, |
| "grad_norm": 4.919598888513217, |
| "learning_rate": 6.976744186046513e-07, |
| "loss": 0.3667, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.023228803716608595, |
| "grad_norm": 5.323101120637421, |
| "learning_rate": 9.30232558139535e-07, |
| "loss": 0.392, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.027874564459930314, |
| "grad_norm": 4.825973626019842, |
| "learning_rate": 1.1627906976744188e-06, |
| "loss": 0.3745, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.032520325203252036, |
| "grad_norm": 4.722030575751275, |
| "learning_rate": 1.3953488372093025e-06, |
| "loss": 0.372, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03716608594657375, |
| "grad_norm": 3.204008987512321, |
| "learning_rate": 1.6279069767441862e-06, |
| "loss": 0.3266, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.041811846689895474, |
| "grad_norm": 3.462779060517298, |
| "learning_rate": 1.86046511627907e-06, |
| "loss": 0.3531, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.04645760743321719, |
| "grad_norm": 2.2912708740881493, |
| "learning_rate": 2.0930232558139536e-06, |
| "loss": 0.2766, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05110336817653891, |
| "grad_norm": 2.360020281101277, |
| "learning_rate": 2.3255813953488376e-06, |
| "loss": 0.2708, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05574912891986063, |
| "grad_norm": 2.210016095359394, |
| "learning_rate": 2.558139534883721e-06, |
| "loss": 0.2772, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06039488966318235, |
| "grad_norm": 2.0430891492298042, |
| "learning_rate": 2.790697674418605e-06, |
| "loss": 0.2116, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06504065040650407, |
| "grad_norm": 1.1937018297310904, |
| "learning_rate": 3.0232558139534885e-06, |
| "loss": 0.1785, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06968641114982578, |
| "grad_norm": 2.293421959275872, |
| "learning_rate": 3.2558139534883724e-06, |
| "loss": 0.162, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0743321718931475, |
| "grad_norm": 2.491793718040822, |
| "learning_rate": 3.4883720930232564e-06, |
| "loss": 0.1625, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07897793263646923, |
| "grad_norm": 1.0576667987718384, |
| "learning_rate": 3.72093023255814e-06, |
| "loss": 0.2098, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.08362369337979095, |
| "grad_norm": 1.3362976538502087, |
| "learning_rate": 3.953488372093024e-06, |
| "loss": 0.2079, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08826945412311266, |
| "grad_norm": 0.8210443873224946, |
| "learning_rate": 4.186046511627907e-06, |
| "loss": 0.1393, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.09291521486643438, |
| "grad_norm": 1.3763061922656377, |
| "learning_rate": 4.418604651162791e-06, |
| "loss": 0.1836, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0975609756097561, |
| "grad_norm": 0.8540487861144667, |
| "learning_rate": 4.651162790697675e-06, |
| "loss": 0.1726, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.10220673635307782, |
| "grad_norm": 0.9965597925413834, |
| "learning_rate": 4.883720930232559e-06, |
| "loss": 0.1594, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.10685249709639953, |
| "grad_norm": 0.8124932586870112, |
| "learning_rate": 5.116279069767442e-06, |
| "loss": 0.1645, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.11149825783972125, |
| "grad_norm": 0.765248257044288, |
| "learning_rate": 5.348837209302326e-06, |
| "loss": 0.1689, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.11614401858304298, |
| "grad_norm": 0.6535802502844509, |
| "learning_rate": 5.58139534883721e-06, |
| "loss": 0.1221, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1207897793263647, |
| "grad_norm": 0.9332551865412465, |
| "learning_rate": 5.8139534883720935e-06, |
| "loss": 0.1441, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1254355400696864, |
| "grad_norm": 0.6281977004298958, |
| "learning_rate": 6.046511627906977e-06, |
| "loss": 0.145, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.13008130081300814, |
| "grad_norm": 0.6506862696473351, |
| "learning_rate": 6.279069767441861e-06, |
| "loss": 0.143, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.13472706155632985, |
| "grad_norm": 0.5925406859408308, |
| "learning_rate": 6.511627906976745e-06, |
| "loss": 0.114, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13937282229965156, |
| "grad_norm": 0.6169180678569642, |
| "learning_rate": 6.744186046511628e-06, |
| "loss": 0.1468, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1440185830429733, |
| "grad_norm": 0.6614089477540361, |
| "learning_rate": 6.976744186046513e-06, |
| "loss": 0.1499, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.148664343786295, |
| "grad_norm": 0.5308532041753055, |
| "learning_rate": 7.209302325581395e-06, |
| "loss": 0.1215, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.15331010452961671, |
| "grad_norm": 0.5280217992451164, |
| "learning_rate": 7.44186046511628e-06, |
| "loss": 0.1152, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.15795586527293845, |
| "grad_norm": 0.5648689606705951, |
| "learning_rate": 7.674418604651164e-06, |
| "loss": 0.1272, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.16260162601626016, |
| "grad_norm": 0.6245478572825075, |
| "learning_rate": 7.906976744186048e-06, |
| "loss": 0.1239, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1672473867595819, |
| "grad_norm": 0.5730808630246532, |
| "learning_rate": 8.139534883720931e-06, |
| "loss": 0.1471, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1718931475029036, |
| "grad_norm": 0.8015364328717397, |
| "learning_rate": 8.372093023255815e-06, |
| "loss": 0.1294, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1765389082462253, |
| "grad_norm": 0.5068324361347291, |
| "learning_rate": 8.604651162790698e-06, |
| "loss": 0.1143, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.18118466898954705, |
| "grad_norm": 0.43809350886606024, |
| "learning_rate": 8.837209302325582e-06, |
| "loss": 0.1036, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.18583042973286876, |
| "grad_norm": 0.6474622607168083, |
| "learning_rate": 9.069767441860465e-06, |
| "loss": 0.1293, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.398228005633473, |
| "learning_rate": 9.30232558139535e-06, |
| "loss": 0.0795, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1951219512195122, |
| "grad_norm": 0.5722673746771715, |
| "learning_rate": 9.534883720930234e-06, |
| "loss": 0.1195, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1997677119628339, |
| "grad_norm": 0.6387829720867708, |
| "learning_rate": 9.767441860465117e-06, |
| "loss": 0.0944, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.20441347270615565, |
| "grad_norm": 0.7262386990022901, |
| "learning_rate": 1e-05, |
| "loss": 0.1238, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.20905923344947736, |
| "grad_norm": 0.6415131233839582, |
| "learning_rate": 9.999835253787472e-06, |
| "loss": 0.0962, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.21370499419279906, |
| "grad_norm": 0.49038910043998885, |
| "learning_rate": 9.99934102600642e-06, |
| "loss": 0.1138, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2183507549361208, |
| "grad_norm": 0.4998657774910992, |
| "learning_rate": 9.998517349225698e-06, |
| "loss": 0.1167, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2229965156794425, |
| "grad_norm": 0.5526423671759817, |
| "learning_rate": 9.997364277724362e-06, |
| "loss": 0.1139, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.22764227642276422, |
| "grad_norm": 0.6758176165629011, |
| "learning_rate": 9.99588188748808e-06, |
| "loss": 0.1228, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.23228803716608595, |
| "grad_norm": 0.4301604798832004, |
| "learning_rate": 9.994070276204115e-06, |
| "loss": 0.1064, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23693379790940766, |
| "grad_norm": 0.46902072966952235, |
| "learning_rate": 9.991929563254913e-06, |
| "loss": 0.1108, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2415795586527294, |
| "grad_norm": 0.4794902114805145, |
| "learning_rate": 9.989459889710214e-06, |
| "loss": 0.1186, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2462253193960511, |
| "grad_norm": 0.5285811649673939, |
| "learning_rate": 9.986661418317759e-06, |
| "loss": 0.1122, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.2508710801393728, |
| "grad_norm": 0.47633553231292913, |
| "learning_rate": 9.983534333492575e-06, |
| "loss": 0.1082, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.25551684088269455, |
| "grad_norm": 0.44801729878550234, |
| "learning_rate": 9.980078841304817e-06, |
| "loss": 0.099, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2601626016260163, |
| "grad_norm": 0.5171140926967782, |
| "learning_rate": 9.97629516946618e-06, |
| "loss": 0.1325, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.26480836236933797, |
| "grad_norm": 0.5256667140476758, |
| "learning_rate": 9.97218356731491e-06, |
| "loss": 0.1182, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2694541231126597, |
| "grad_norm": 0.5021698137895357, |
| "learning_rate": 9.967744305799358e-06, |
| "loss": 0.116, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.27409988385598144, |
| "grad_norm": 0.4524730076882164, |
| "learning_rate": 9.962977677460132e-06, |
| "loss": 0.105, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2787456445993031, |
| "grad_norm": 0.4865312647490816, |
| "learning_rate": 9.957883996410821e-06, |
| "loss": 0.0996, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28339140534262486, |
| "grad_norm": 0.6154932122623457, |
| "learning_rate": 9.952463598317286e-06, |
| "loss": 0.1032, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2880371660859466, |
| "grad_norm": 0.5720328273849398, |
| "learning_rate": 9.946716840375552e-06, |
| "loss": 0.1235, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2926829268292683, |
| "grad_norm": 0.5353383236460627, |
| "learning_rate": 9.940644101288259e-06, |
| "loss": 0.1319, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.29732868757259, |
| "grad_norm": 0.5699498931307571, |
| "learning_rate": 9.934245781239714e-06, |
| "loss": 0.1222, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.30197444831591175, |
| "grad_norm": 0.5041655852014973, |
| "learning_rate": 9.927522301869515e-06, |
| "loss": 0.1185, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.30662020905923343, |
| "grad_norm": 0.560593382940921, |
| "learning_rate": 9.920474106244764e-06, |
| "loss": 0.1258, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.31126596980255516, |
| "grad_norm": 0.47113303080753793, |
| "learning_rate": 9.913101658830879e-06, |
| "loss": 0.0992, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3159117305458769, |
| "grad_norm": 0.4949311762244736, |
| "learning_rate": 9.905405445460972e-06, |
| "loss": 0.1086, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3205574912891986, |
| "grad_norm": 0.5118809629828459, |
| "learning_rate": 9.897385973303845e-06, |
| "loss": 0.1115, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3252032520325203, |
| "grad_norm": 0.45743773717890135, |
| "learning_rate": 9.889043770830566e-06, |
| "loss": 0.0981, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.32984901277584205, |
| "grad_norm": 0.41235204595203667, |
| "learning_rate": 9.880379387779637e-06, |
| "loss": 0.0966, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3344947735191638, |
| "grad_norm": 0.4726483178511245, |
| "learning_rate": 9.871393395120774e-06, |
| "loss": 0.0956, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.33914053426248547, |
| "grad_norm": 0.4601067106108247, |
| "learning_rate": 9.862086385017283e-06, |
| "loss": 0.0988, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3437862950058072, |
| "grad_norm": 0.4731203086011778, |
| "learning_rate": 9.852458970787027e-06, |
| "loss": 0.1149, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.34843205574912894, |
| "grad_norm": 0.5764782890777245, |
| "learning_rate": 9.842511786862018e-06, |
| "loss": 0.1327, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3530778164924506, |
| "grad_norm": 0.48832588953944156, |
| "learning_rate": 9.832245488746612e-06, |
| "loss": 0.1019, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.35772357723577236, |
| "grad_norm": 0.44229595011423917, |
| "learning_rate": 9.821660752974294e-06, |
| "loss": 0.1067, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3623693379790941, |
| "grad_norm": 0.4796942954547918, |
| "learning_rate": 9.81075827706312e-06, |
| "loss": 0.0825, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3670150987224158, |
| "grad_norm": 0.40318849008254065, |
| "learning_rate": 9.799538779469734e-06, |
| "loss": 0.0998, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3716608594657375, |
| "grad_norm": 0.5193073119324224, |
| "learning_rate": 9.78800299954203e-06, |
| "loss": 0.0857, |
| "step": 80 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 430, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.063277885531095e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|