| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.6210045662100456, | |
| "eval_steps": 500, | |
| "global_step": 45, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0365296803652968, | |
| "grad_norm": 0.0023122017737478018, | |
| "learning_rate": 0.0, | |
| "loss": 0.5373, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0730593607305936, | |
| "grad_norm": 0.002487431513145566, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5679, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.1095890410958904, | |
| "grad_norm": 0.0024793234188109636, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5436, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.1461187214611872, | |
| "grad_norm": 0.0010261598508805037, | |
| "learning_rate": 1.962962962962963e-05, | |
| "loss": 0.4465, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.182648401826484, | |
| "grad_norm": 0.000856598315294832, | |
| "learning_rate": 1.925925925925926e-05, | |
| "loss": 0.4004, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.2191780821917808, | |
| "grad_norm": 0.0007980006048455834, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.3967, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.2557077625570776, | |
| "grad_norm": 0.0006401128484867513, | |
| "learning_rate": 1.851851851851852e-05, | |
| "loss": 0.3734, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.2922374429223744, | |
| "grad_norm": 0.0005659515736624599, | |
| "learning_rate": 1.814814814814815e-05, | |
| "loss": 0.3909, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.3287671232876712, | |
| "grad_norm": 0.0005246418877504766, | |
| "learning_rate": 1.7777777777777777e-05, | |
| "loss": 0.3477, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.365296803652968, | |
| "grad_norm": 0.0005232089897617698, | |
| "learning_rate": 1.740740740740741e-05, | |
| "loss": 0.3427, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.4018264840182648, | |
| "grad_norm": 0.0005015431670472026, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 0.3477, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 0.0004877804312855005, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.3399, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.4748858447488584, | |
| "grad_norm": 0.0004098584468010813, | |
| "learning_rate": 1.6296296296296297e-05, | |
| "loss": 0.3522, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.5114155251141552, | |
| "grad_norm": 0.00042966814362443984, | |
| "learning_rate": 1.5925925925925926e-05, | |
| "loss": 0.3444, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 0.00046070184907875955, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 0.3206, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.5844748858447488, | |
| "grad_norm": 0.0006697291391901672, | |
| "learning_rate": 1.5185185185185187e-05, | |
| "loss": 0.3342, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.6210045662100456, | |
| "grad_norm": 0.0003732343902811408, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.3338, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.6575342465753424, | |
| "grad_norm": 0.00033160060411319137, | |
| "learning_rate": 1.4444444444444446e-05, | |
| "loss": 0.3229, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.6940639269406392, | |
| "grad_norm": 0.00037524307845160365, | |
| "learning_rate": 1.4074074074074075e-05, | |
| "loss": 0.3481, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.730593607305936, | |
| "grad_norm": 0.00035436192410998046, | |
| "learning_rate": 1.3703703703703706e-05, | |
| "loss": 0.3116, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.7671232876712328, | |
| "grad_norm": 0.0003573898575268686, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.3052, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.8036529680365296, | |
| "grad_norm": 0.00043323307181708515, | |
| "learning_rate": 1.2962962962962964e-05, | |
| "loss": 0.284, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.8401826484018264, | |
| "grad_norm": 0.0002917552483268082, | |
| "learning_rate": 1.2592592592592593e-05, | |
| "loss": 0.3194, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 0.0003827356267720461, | |
| "learning_rate": 1.2222222222222224e-05, | |
| "loss": 0.3147, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.91324200913242, | |
| "grad_norm": 0.00035413680598139763, | |
| "learning_rate": 1.1851851851851852e-05, | |
| "loss": 0.3013, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.9497716894977168, | |
| "grad_norm": 0.0003756983787752688, | |
| "learning_rate": 1.1481481481481482e-05, | |
| "loss": 0.3059, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.9863013698630136, | |
| "grad_norm": 0.00033429820905439556, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.2919, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.0015006560133770108, | |
| "learning_rate": 1.0740740740740742e-05, | |
| "loss": 0.2799, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 1.0365296803652968, | |
| "grad_norm": 0.0003345690493006259, | |
| "learning_rate": 1.037037037037037e-05, | |
| "loss": 0.2892, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 1.0730593607305936, | |
| "grad_norm": 0.00033236839226447046, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2808, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.1095890410958904, | |
| "grad_norm": 0.0002631679526530206, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.2771, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 1.1461187214611872, | |
| "grad_norm": 0.0002748680708464235, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 0.2766, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 1.182648401826484, | |
| "grad_norm": 0.00031516075250692666, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.2888, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 1.2191780821917808, | |
| "grad_norm": 0.00029519532108679414, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 0.2796, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.2557077625570776, | |
| "grad_norm": 0.0005835472838953137, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 0.3151, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.2922374429223744, | |
| "grad_norm": 0.0004080029611941427, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.2764, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.3287671232876712, | |
| "grad_norm": 0.0004656020610127598, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.3262, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 1.365296803652968, | |
| "grad_norm": 0.00028429756639525294, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.2802, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.4018264840182648, | |
| "grad_norm": 0.00030639086617156863, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.255, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 1.4383561643835616, | |
| "grad_norm": 0.00031736379605717957, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 0.2443, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.4748858447488584, | |
| "grad_norm": 0.0002719672629609704, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.2877, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 1.5114155251141552, | |
| "grad_norm": 0.0003220531507395208, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.2799, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.547945205479452, | |
| "grad_norm": 0.00034380314173176885, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.2919, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 1.5844748858447488, | |
| "grad_norm": 0.0004067339759785682, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 0.2788, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.6210045662100456, | |
| "grad_norm": 0.00034477008739486337, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.2453, | |
| "step": 45 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 56, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 15, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.214917272234885e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |