| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.946341463414634, |
| "eval_steps": 500, |
| "global_step": 612, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0975609756097561, |
| "grad_norm": 5.756045341491699, |
| "learning_rate": 1.4516129032258066e-06, |
| "loss": 0.8809, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1951219512195122, |
| "grad_norm": 3.873936176300049, |
| "learning_rate": 3.0645161290322584e-06, |
| "loss": 0.8044, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2926829268292683, |
| "grad_norm": 1.6314491033554077, |
| "learning_rate": 4.67741935483871e-06, |
| "loss": 0.7459, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3902439024390244, |
| "grad_norm": 2.0878822803497314, |
| "learning_rate": 6.290322580645162e-06, |
| "loss": 0.727, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 1.7192010879516602, |
| "learning_rate": 7.903225806451613e-06, |
| "loss": 0.6279, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5853658536585366, |
| "grad_norm": 1.5417181253433228, |
| "learning_rate": 9.516129032258065e-06, |
| "loss": 0.6367, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6829268292682927, |
| "grad_norm": 1.7374225854873657, |
| "learning_rate": 9.996003750499608e-06, |
| "loss": 0.689, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7804878048780488, |
| "grad_norm": 1.3835179805755615, |
| "learning_rate": 9.976445660039118e-06, |
| "loss": 0.6537, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8780487804878049, |
| "grad_norm": 1.206093192100525, |
| "learning_rate": 9.940655438201113e-06, |
| "loss": 0.6421, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 1.456930160522461, |
| "learning_rate": 9.888749825250151e-06, |
| "loss": 0.6039, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.0682926829268293, |
| "grad_norm": 1.419948935508728, |
| "learning_rate": 9.820898126506978e-06, |
| "loss": 0.5969, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1658536585365853, |
| "grad_norm": 1.9503365755081177, |
| "learning_rate": 9.737321660109767e-06, |
| "loss": 0.516, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.2634146341463415, |
| "grad_norm": 1.587859869003296, |
| "learning_rate": 9.638293035120342e-06, |
| "loss": 0.5287, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.3609756097560974, |
| "grad_norm": 1.4838457107543945, |
| "learning_rate": 9.524135262330098e-06, |
| "loss": 0.5411, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4585365853658536, |
| "grad_norm": 1.41899836063385, |
| "learning_rate": 9.395220700665924e-06, |
| "loss": 0.5023, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.5560975609756098, |
| "grad_norm": 1.7997419834136963, |
| "learning_rate": 9.251969842632785e-06, |
| "loss": 0.5487, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.653658536585366, |
| "grad_norm": 1.1504219770431519, |
| "learning_rate": 9.094849942754564e-06, |
| "loss": 0.491, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.751219512195122, |
| "grad_norm": 1.4111475944519043, |
| "learning_rate": 8.924373493486941e-06, |
| "loss": 0.4901, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.848780487804878, |
| "grad_norm": 1.6067090034484863, |
| "learning_rate": 8.741096553573506e-06, |
| "loss": 0.5439, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.946341463414634, |
| "grad_norm": 1.2158631086349487, |
| "learning_rate": 8.545616934297733e-06, |
| "loss": 0.5212, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.0390243902439025, |
| "grad_norm": 1.3324558734893799, |
| "learning_rate": 8.338572249546813e-06, |
| "loss": 0.49, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.1365853658536587, |
| "grad_norm": 1.4280637502670288, |
| "learning_rate": 8.120637836047698e-06, |
| "loss": 0.4122, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.234146341463415, |
| "grad_norm": 1.0540653467178345, |
| "learning_rate": 7.892524550559056e-06, |
| "loss": 0.3693, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.3317073170731706, |
| "grad_norm": 1.2099759578704834, |
| "learning_rate": 7.654976451204288e-06, |
| "loss": 0.3851, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.4292682926829268, |
| "grad_norm": 1.056799292564392, |
| "learning_rate": 7.408768370508577e-06, |
| "loss": 0.377, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.526829268292683, |
| "grad_norm": 1.4203627109527588, |
| "learning_rate": 7.154703388056246e-06, |
| "loss": 0.3864, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.624390243902439, |
| "grad_norm": 0.9842203259468079, |
| "learning_rate": 6.893610211012067e-06, |
| "loss": 0.3881, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.721951219512195, |
| "grad_norm": 0.9598954916000366, |
| "learning_rate": 6.6263404710507495e-06, |
| "loss": 0.4051, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.819512195121951, |
| "grad_norm": 1.2445122003555298, |
| "learning_rate": 6.3537659465114275e-06, |
| "loss": 0.4061, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.9170731707317072, |
| "grad_norm": 1.1891133785247803, |
| "learning_rate": 6.076775718837911e-06, |
| "loss": 0.3537, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.0097560975609756, |
| "grad_norm": 1.345109224319458, |
| "learning_rate": 5.796273272579823e-06, |
| "loss": 0.3309, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.107317073170732, |
| "grad_norm": 0.9590056538581848, |
| "learning_rate": 5.513173548413789e-06, |
| "loss": 0.2277, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.204878048780488, |
| "grad_norm": 1.3723112344741821, |
| "learning_rate": 5.228399958797117e-06, |
| "loss": 0.2754, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.3024390243902437, |
| "grad_norm": 1.052578091621399, |
| "learning_rate": 4.9428813759883e-06, |
| "loss": 0.2226, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 1.3540409803390503, |
| "learning_rate": 4.6575491022587714e-06, |
| "loss": 0.3032, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.497560975609756, |
| "grad_norm": 0.979840099811554, |
| "learning_rate": 4.373333832178478e-06, |
| "loss": 0.2326, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.5951219512195123, |
| "grad_norm": 1.1373505592346191, |
| "learning_rate": 4.091162616883634e-06, |
| "loss": 0.3115, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.692682926829268, |
| "grad_norm": 1.0093439817428589, |
| "learning_rate": 3.8119558402285994e-06, |
| "loss": 0.2202, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.790243902439024, |
| "grad_norm": 1.2901158332824707, |
| "learning_rate": 3.5366242166850624e-06, |
| "loss": 0.2969, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.8878048780487804, |
| "grad_norm": 0.9081848859786987, |
| "learning_rate": 3.2660658207807205e-06, |
| "loss": 0.237, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.9853658536585366, |
| "grad_norm": 1.0115485191345215, |
| "learning_rate": 3.0011631577668327e-06, |
| "loss": 0.2668, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.078048780487805, |
| "grad_norm": 1.3101578950881958, |
| "learning_rate": 2.7427802850695306e-06, |
| "loss": 0.1837, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.175609756097561, |
| "grad_norm": 1.2633917331695557, |
| "learning_rate": 2.4917599939140884e-06, |
| "loss": 0.196, |
| "step": 430 |
| }, |
| { |
| "epoch": 4.273170731707317, |
| "grad_norm": 0.7994241714477539, |
| "learning_rate": 2.2489210603151144e-06, |
| "loss": 0.1483, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.3707317073170735, |
| "grad_norm": 1.1350624561309814, |
| "learning_rate": 2.015055574399388e-06, |
| "loss": 0.1495, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.46829268292683, |
| "grad_norm": 1.1117892265319824, |
| "learning_rate": 1.7909263567724917e-06, |
| "loss": 0.1489, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.565853658536585, |
| "grad_norm": 0.9958974719047546, |
| "learning_rate": 1.5772644703565564e-06, |
| "loss": 0.1702, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.663414634146341, |
| "grad_norm": 1.1438698768615723, |
| "learning_rate": 1.3747668358149658e-06, |
| "loss": 0.1774, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.760975609756097, |
| "grad_norm": 0.9679899215698242, |
| "learning_rate": 1.1840939583419986e-06, |
| "loss": 0.1462, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.8585365853658535, |
| "grad_norm": 1.017114520072937, |
| "learning_rate": 1.0058677732321826e-06, |
| "loss": 0.1865, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.95609756097561, |
| "grad_norm": 1.2413111925125122, |
| "learning_rate": 8.406696172566258e-07, |
| "loss": 0.1795, |
| "step": 510 |
| }, |
| { |
| "epoch": 5.048780487804878, |
| "grad_norm": 0.7522889971733093, |
| "learning_rate": 6.890383324633121e-07, |
| "loss": 0.157, |
| "step": 520 |
| }, |
| { |
| "epoch": 5.146341463414634, |
| "grad_norm": 0.9641028046607971, |
| "learning_rate": 5.514685085863286e-07, |
| "loss": 0.1632, |
| "step": 530 |
| }, |
| { |
| "epoch": 5.2439024390243905, |
| "grad_norm": 1.0888645648956299, |
| "learning_rate": 4.2840886979696074e-07, |
| "loss": 0.1269, |
| "step": 540 |
| }, |
| { |
| "epoch": 5.341463414634147, |
| "grad_norm": 0.7203080058097839, |
| "learning_rate": 3.2026081105871634e-07, |
| "loss": 0.1096, |
| "step": 550 |
| }, |
| { |
| "epoch": 5.439024390243903, |
| "grad_norm": 1.3017480373382568, |
| "learning_rate": 2.2737708886037823e-07, |
| "loss": 0.1133, |
| "step": 560 |
| }, |
| { |
| "epoch": 5.536585365853659, |
| "grad_norm": 0.9247483015060425, |
| "learning_rate": 1.5006067059766484e-07, |
| "loss": 0.1188, |
| "step": 570 |
| }, |
| { |
| "epoch": 5.634146341463414, |
| "grad_norm": 0.8894818425178528, |
| "learning_rate": 8.856374635655696e-08, |
| "loss": 0.1069, |
| "step": 580 |
| }, |
| { |
| "epoch": 5.7317073170731705, |
| "grad_norm": 0.981769323348999, |
| "learning_rate": 4.3086906321652136e-08, |
| "loss": 0.0943, |
| "step": 590 |
| }, |
| { |
| "epoch": 5.829268292682927, |
| "grad_norm": 0.441194087266922, |
| "learning_rate": 1.3778486492681542e-08, |
| "loss": 0.1061, |
| "step": 600 |
| }, |
| { |
| "epoch": 5.926829268292683, |
| "grad_norm": 0.8621994256973267, |
| "learning_rate": 7.340848433040615e-10, |
| "loss": 0.1294, |
| "step": 610 |
| }, |
| { |
| "epoch": 5.946341463414634, |
| "step": 612, |
| "total_flos": 66516718592000.0, |
| "train_loss": 0.3622722171880061, |
| "train_runtime": 10104.8075, |
| "train_samples_per_second": 0.485, |
| "train_steps_per_second": 0.061 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 612, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 66516718592000.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|