| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5641, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01772735330615139, |
| "grad_norm": 0.04697391018271446, |
| "learning_rate": 4.432624113475178e-07, |
| "loss": 2.8098, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03545470661230278, |
| "grad_norm": 0.0493495836853981, |
| "learning_rate": 8.865248226950356e-07, |
| "loss": 2.8311, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05318205991845418, |
| "grad_norm": 0.05300679802894592, |
| "learning_rate": 1.3297872340425533e-06, |
| "loss": 2.8299, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07090941322460556, |
| "grad_norm": 0.06578100472688675, |
| "learning_rate": 1.7730496453900712e-06, |
| "loss": 2.8051, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08863676653075696, |
| "grad_norm": 0.07819516211748123, |
| "learning_rate": 2.2118794326241137e-06, |
| "loss": 2.8049, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.10636411983690836, |
| "grad_norm": 0.08765570819377899, |
| "learning_rate": 2.6551418439716316e-06, |
| "loss": 2.8175, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12409147314305974, |
| "grad_norm": 0.10122698545455933, |
| "learning_rate": 3.098404255319149e-06, |
| "loss": 2.7791, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.14181882644921112, |
| "grad_norm": 0.11092907190322876, |
| "learning_rate": 3.5416666666666673e-06, |
| "loss": 2.7762, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.15954617975536253, |
| "grad_norm": 0.11580634117126465, |
| "learning_rate": 3.984929078014185e-06, |
| "loss": 2.7725, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.17727353306151392, |
| "grad_norm": 0.13618434965610504, |
| "learning_rate": 4.428191489361702e-06, |
| "loss": 2.7711, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1950008863676653, |
| "grad_norm": 0.13589724898338318, |
| "learning_rate": 4.871453900709221e-06, |
| "loss": 2.7544, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2127282396738167, |
| "grad_norm": 0.1407850682735443, |
| "learning_rate": 5.314716312056738e-06, |
| "loss": 2.7616, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2304555929799681, |
| "grad_norm": 0.16542290151119232, |
| "learning_rate": 5.757978723404256e-06, |
| "loss": 2.7719, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.24818294628611948, |
| "grad_norm": 0.16397430002689362, |
| "learning_rate": 6.2012411347517734e-06, |
| "loss": 2.7557, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.26591029959227086, |
| "grad_norm": 0.16306352615356445, |
| "learning_rate": 6.644503546099291e-06, |
| "loss": 2.742, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.28363765289842224, |
| "grad_norm": 0.18215014040470123, |
| "learning_rate": 7.087765957446809e-06, |
| "loss": 2.7483, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3013650062045737, |
| "grad_norm": 0.16818368434906006, |
| "learning_rate": 7.531028368794326e-06, |
| "loss": 2.7402, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.31909235951072507, |
| "grad_norm": 0.17571307718753815, |
| "learning_rate": 7.974290780141844e-06, |
| "loss": 2.7278, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.33681971281687645, |
| "grad_norm": 0.18817897140979767, |
| "learning_rate": 8.417553191489362e-06, |
| "loss": 2.7383, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.35454706612302783, |
| "grad_norm": 0.2033829391002655, |
| "learning_rate": 8.86081560283688e-06, |
| "loss": 2.735, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3722744194291792, |
| "grad_norm": 0.19735954701900482, |
| "learning_rate": 9.304078014184398e-06, |
| "loss": 2.7255, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3900017727353306, |
| "grad_norm": 0.22808896005153656, |
| "learning_rate": 9.747340425531916e-06, |
| "loss": 2.711, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.407729126041482, |
| "grad_norm": 0.2145451158285141, |
| "learning_rate": 1.0190602836879434e-05, |
| "loss": 2.7341, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.4254564793476334, |
| "grad_norm": 0.2078903764486313, |
| "learning_rate": 1.0633865248226952e-05, |
| "loss": 2.7165, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.4431838326537848, |
| "grad_norm": 0.2379077523946762, |
| "learning_rate": 1.107712765957447e-05, |
| "loss": 2.7087, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.4609111859599362, |
| "grad_norm": 0.21793442964553833, |
| "learning_rate": 1.1520390070921987e-05, |
| "loss": 2.7173, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.47863853926608757, |
| "grad_norm": 0.2230735719203949, |
| "learning_rate": 1.1963652482269505e-05, |
| "loss": 2.7031, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.49636589257223895, |
| "grad_norm": 0.2279297262430191, |
| "learning_rate": 1.2406914893617021e-05, |
| "loss": 2.7035, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.5140932458783903, |
| "grad_norm": 0.24638701975345612, |
| "learning_rate": 1.285017730496454e-05, |
| "loss": 2.6969, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.5318205991845417, |
| "grad_norm": 0.2320908159017563, |
| "learning_rate": 1.3293439716312057e-05, |
| "loss": 2.7033, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5495479524906931, |
| "grad_norm": 0.23391854763031006, |
| "learning_rate": 1.3736702127659575e-05, |
| "loss": 2.6866, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5672753057968445, |
| "grad_norm": 0.2577739357948303, |
| "learning_rate": 1.4179964539007095e-05, |
| "loss": 2.6941, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5850026591029959, |
| "grad_norm": 0.24674548208713531, |
| "learning_rate": 1.4623226950354613e-05, |
| "loss": 2.6943, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.6027300124091474, |
| "grad_norm": 0.276777982711792, |
| "learning_rate": 1.5066489361702127e-05, |
| "loss": 2.6852, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.6204573657152987, |
| "grad_norm": 0.2453552931547165, |
| "learning_rate": 1.5509751773049645e-05, |
| "loss": 2.6822, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6381847190214501, |
| "grad_norm": 0.2694833278656006, |
| "learning_rate": 1.5953014184397165e-05, |
| "loss": 2.6817, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6559120723276015, |
| "grad_norm": 0.2562524080276489, |
| "learning_rate": 1.639627659574468e-05, |
| "loss": 2.6801, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.6736394256337529, |
| "grad_norm": 0.24736031889915466, |
| "learning_rate": 1.68395390070922e-05, |
| "loss": 2.6694, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.6913667789399043, |
| "grad_norm": 0.25903522968292236, |
| "learning_rate": 1.728280141843972e-05, |
| "loss": 2.6682, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.7090941322460557, |
| "grad_norm": 0.2558101713657379, |
| "learning_rate": 1.7726063829787233e-05, |
| "loss": 2.664, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.726821485552207, |
| "grad_norm": 0.25806924700737, |
| "learning_rate": 1.8169326241134752e-05, |
| "loss": 2.6725, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.7445488388583584, |
| "grad_norm": 0.26673123240470886, |
| "learning_rate": 1.8612588652482272e-05, |
| "loss": 2.6739, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.7622761921645098, |
| "grad_norm": 0.2724100947380066, |
| "learning_rate": 1.9055851063829788e-05, |
| "loss": 2.6706, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.7800035454706612, |
| "grad_norm": 0.2800888121128082, |
| "learning_rate": 1.9499113475177308e-05, |
| "loss": 2.6668, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.7977308987768126, |
| "grad_norm": 0.2775346338748932, |
| "learning_rate": 1.9942375886524824e-05, |
| "loss": 2.6652, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.815458252082964, |
| "grad_norm": 0.282071053981781, |
| "learning_rate": 1.9708392780483382e-05, |
| "loss": 2.6562, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.8331856053891155, |
| "grad_norm": 0.2759540379047394, |
| "learning_rate": 1.8676440399345303e-05, |
| "loss": 2.6686, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.8509129586952668, |
| "grad_norm": 0.27361398935317993, |
| "learning_rate": 1.6976990989182965e-05, |
| "loss": 2.6674, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.8686403120014182, |
| "grad_norm": 0.2933979332447052, |
| "learning_rate": 1.4740786810652702e-05, |
| "loss": 2.653, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.8863676653075696, |
| "grad_norm": 0.277789443731308, |
| "learning_rate": 1.2139863811304302e-05, |
| "loss": 2.6543, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.904095018613721, |
| "grad_norm": 0.2968071699142456, |
| "learning_rate": 9.374316533931709e-06, |
| "loss": 2.6589, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.9218223719198724, |
| "grad_norm": 0.2793715000152588, |
| "learning_rate": 6.656904417464698e-06, |
| "loss": 2.6495, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.9395497252260238, |
| "grad_norm": 0.27239373326301575, |
| "learning_rate": 4.196683760441683e-06, |
| "loss": 2.6603, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.9572770785321751, |
| "grad_norm": 0.2786346673965454, |
| "learning_rate": 2.1829245749555094e-06, |
| "loss": 2.6586, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.9750044318383265, |
| "grad_norm": 0.27085384726524353, |
| "learning_rate": 7.705496417336055e-07, |
| "loss": 2.6533, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9927317851444779, |
| "grad_norm": 0.29114964604377747, |
| "learning_rate": 6.821597084026966e-08, |
| "loss": 2.6639, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 5641, |
| "total_flos": 8.219951597501809e+17, |
| "train_loss": 2.7133784883306755, |
| "train_runtime": 1594.1832, |
| "train_samples_per_second": 56.61, |
| "train_steps_per_second": 3.538 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5641, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.219951597501809e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|