{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6210045662100456, "eval_steps": 500, "global_step": 45, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0365296803652968, "grad_norm": 0.0023122017737478018, "learning_rate": 0.0, "loss": 0.5373, "step": 1 }, { "epoch": 0.0730593607305936, "grad_norm": 0.002487431513145566, "learning_rate": 2e-05, "loss": 0.5679, "step": 2 }, { "epoch": 0.1095890410958904, "grad_norm": 0.0024793234188109636, "learning_rate": 2e-05, "loss": 0.5436, "step": 3 }, { "epoch": 0.1461187214611872, "grad_norm": 0.0010261598508805037, "learning_rate": 1.962962962962963e-05, "loss": 0.4465, "step": 4 }, { "epoch": 0.182648401826484, "grad_norm": 0.000856598315294832, "learning_rate": 1.925925925925926e-05, "loss": 0.4004, "step": 5 }, { "epoch": 0.2191780821917808, "grad_norm": 0.0007980006048455834, "learning_rate": 1.888888888888889e-05, "loss": 0.3967, "step": 6 }, { "epoch": 0.2557077625570776, "grad_norm": 0.0006401128484867513, "learning_rate": 1.851851851851852e-05, "loss": 0.3734, "step": 7 }, { "epoch": 0.2922374429223744, "grad_norm": 0.0005659515736624599, "learning_rate": 1.814814814814815e-05, "loss": 0.3909, "step": 8 }, { "epoch": 0.3287671232876712, "grad_norm": 0.0005246418877504766, "learning_rate": 1.7777777777777777e-05, "loss": 0.3477, "step": 9 }, { "epoch": 0.365296803652968, "grad_norm": 0.0005232089897617698, "learning_rate": 1.740740740740741e-05, "loss": 0.3427, "step": 10 }, { "epoch": 0.4018264840182648, "grad_norm": 0.0005015431670472026, "learning_rate": 1.7037037037037038e-05, "loss": 0.3477, "step": 11 }, { "epoch": 0.4383561643835616, "grad_norm": 0.0004877804312855005, "learning_rate": 1.6666666666666667e-05, "loss": 0.3399, "step": 12 }, { "epoch": 0.4748858447488584, "grad_norm": 0.0004098584468010813, "learning_rate": 1.6296296296296297e-05, "loss": 0.3522, "step": 13 }, { "epoch": 0.5114155251141552, "grad_norm": 0.00042966814362443984, "learning_rate": 1.5925925925925926e-05, "loss": 0.3444, "step": 14 }, { "epoch": 0.547945205479452, "grad_norm": 0.00046070184907875955, "learning_rate": 1.555555555555556e-05, "loss": 0.3206, "step": 15 }, { "epoch": 0.5844748858447488, "grad_norm": 0.0006697291391901672, "learning_rate": 1.5185185185185187e-05, "loss": 0.3342, "step": 16 }, { "epoch": 0.6210045662100456, "grad_norm": 0.0003732343902811408, "learning_rate": 1.4814814814814815e-05, "loss": 0.3338, "step": 17 }, { "epoch": 0.6575342465753424, "grad_norm": 0.00033160060411319137, "learning_rate": 1.4444444444444446e-05, "loss": 0.3229, "step": 18 }, { "epoch": 0.6940639269406392, "grad_norm": 0.00037524307845160365, "learning_rate": 1.4074074074074075e-05, "loss": 0.3481, "step": 19 }, { "epoch": 0.730593607305936, "grad_norm": 0.00035436192410998046, "learning_rate": 1.3703703703703706e-05, "loss": 0.3116, "step": 20 }, { "epoch": 0.7671232876712328, "grad_norm": 0.0003573898575268686, "learning_rate": 1.3333333333333333e-05, "loss": 0.3052, "step": 21 }, { "epoch": 0.8036529680365296, "grad_norm": 0.00043323307181708515, "learning_rate": 1.2962962962962964e-05, "loss": 0.284, "step": 22 }, { "epoch": 0.8401826484018264, "grad_norm": 0.0002917552483268082, "learning_rate": 1.2592592592592593e-05, "loss": 0.3194, "step": 23 }, { "epoch": 0.8767123287671232, "grad_norm": 0.0003827356267720461, "learning_rate": 1.2222222222222224e-05, "loss": 0.3147, "step": 24 }, { "epoch": 0.91324200913242, "grad_norm": 0.00035413680598139763, "learning_rate": 1.1851851851851852e-05, "loss": 0.3013, "step": 25 }, { "epoch": 0.9497716894977168, "grad_norm": 0.0003756983787752688, "learning_rate": 1.1481481481481482e-05, "loss": 0.3059, "step": 26 }, { "epoch": 0.9863013698630136, "grad_norm": 0.00033429820905439556, "learning_rate": 1.1111111111111113e-05, "loss": 0.2919, "step": 27 }, { "epoch": 1.0, "grad_norm": 0.0015006560133770108, "learning_rate": 1.0740740740740742e-05, "loss": 0.2799, "step": 28 }, { "epoch": 1.0365296803652968, "grad_norm": 0.0003345690493006259, "learning_rate": 1.037037037037037e-05, "loss": 0.2892, "step": 29 }, { "epoch": 1.0730593607305936, "grad_norm": 0.00033236839226447046, "learning_rate": 1e-05, "loss": 0.2808, "step": 30 }, { "epoch": 1.1095890410958904, "grad_norm": 0.0002631679526530206, "learning_rate": 9.62962962962963e-06, "loss": 0.2771, "step": 31 }, { "epoch": 1.1461187214611872, "grad_norm": 0.0002748680708464235, "learning_rate": 9.25925925925926e-06, "loss": 0.2766, "step": 32 }, { "epoch": 1.182648401826484, "grad_norm": 0.00031516075250692666, "learning_rate": 8.888888888888888e-06, "loss": 0.2888, "step": 33 }, { "epoch": 1.2191780821917808, "grad_norm": 0.00029519532108679414, "learning_rate": 8.518518518518519e-06, "loss": 0.2796, "step": 34 }, { "epoch": 1.2557077625570776, "grad_norm": 0.0005835472838953137, "learning_rate": 8.148148148148148e-06, "loss": 0.3151, "step": 35 }, { "epoch": 1.2922374429223744, "grad_norm": 0.0004080029611941427, "learning_rate": 7.77777777777778e-06, "loss": 0.2764, "step": 36 }, { "epoch": 1.3287671232876712, "grad_norm": 0.0004656020610127598, "learning_rate": 7.4074074074074075e-06, "loss": 0.3262, "step": 37 }, { "epoch": 1.365296803652968, "grad_norm": 0.00028429756639525294, "learning_rate": 7.0370370370370375e-06, "loss": 0.2802, "step": 38 }, { "epoch": 1.4018264840182648, "grad_norm": 0.00030639086617156863, "learning_rate": 6.666666666666667e-06, "loss": 0.255, "step": 39 }, { "epoch": 1.4383561643835616, "grad_norm": 0.00031736379605717957, "learning_rate": 6.296296296296297e-06, "loss": 0.2443, "step": 40 }, { "epoch": 1.4748858447488584, "grad_norm": 0.0002719672629609704, "learning_rate": 5.925925925925926e-06, "loss": 0.2877, "step": 41 }, { "epoch": 1.5114155251141552, "grad_norm": 0.0003220531507395208, "learning_rate": 5.555555555555557e-06, "loss": 0.2799, "step": 42 }, { "epoch": 1.547945205479452, "grad_norm": 0.00034380314173176885, "learning_rate": 5.185185185185185e-06, "loss": 0.2919, "step": 43 }, { "epoch": 1.5844748858447488, "grad_norm": 0.0004067339759785682, "learning_rate": 4.814814814814815e-06, "loss": 0.2788, "step": 44 }, { "epoch": 1.6210045662100456, "grad_norm": 0.00034477008739486337, "learning_rate": 4.444444444444444e-06, "loss": 0.2453, "step": 45 } ], "logging_steps": 1.0, "max_steps": 56, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 15, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.214917272234885e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }