working / last-checkpoint /trainer_state.json
youssefedweqd's picture
Training in progress, step 1500, checkpoint
c2cae9c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.766671573678787,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014721036360959812,
"grad_norm": 1.0893694162368774,
"learning_rate": 4.808635917566242e-06,
"loss": 1.1444,
"step": 50
},
{
"epoch": 0.029442072721919624,
"grad_norm": 1.118213415145874,
"learning_rate": 9.715407262021591e-06,
"loss": 0.9497,
"step": 100
},
{
"epoch": 0.04416310908287943,
"grad_norm": 1.3216179609298706,
"learning_rate": 1.4622178606476939e-05,
"loss": 0.8346,
"step": 150
},
{
"epoch": 0.05888414544383925,
"grad_norm": 1.292870044708252,
"learning_rate": 1.9528949950932288e-05,
"loss": 0.7699,
"step": 200
},
{
"epoch": 0.07360518180479905,
"grad_norm": 1.4046356678009033,
"learning_rate": 2.4435721295387637e-05,
"loss": 0.7252,
"step": 250
},
{
"epoch": 0.08832621816575886,
"grad_norm": 1.4438663721084595,
"learning_rate": 2.9342492639842983e-05,
"loss": 0.6996,
"step": 300
},
{
"epoch": 0.10304725452671869,
"grad_norm": 1.3257337808609009,
"learning_rate": 3.424926398429833e-05,
"loss": 0.686,
"step": 350
},
{
"epoch": 0.1177682908876785,
"grad_norm": 1.3639789819717407,
"learning_rate": 3.9156035328753685e-05,
"loss": 0.6601,
"step": 400
},
{
"epoch": 0.1324893272486383,
"grad_norm": 1.4148070812225342,
"learning_rate": 4.406280667320903e-05,
"loss": 0.6371,
"step": 450
},
{
"epoch": 0.1472103636095981,
"grad_norm": 1.3940412998199463,
"learning_rate": 4.8969578017664384e-05,
"loss": 0.6417,
"step": 500
},
{
"epoch": 0.6477255998822317,
"grad_norm": 0.6072946190834045,
"learning_rate": 9.599483839268026e-05,
"loss": 0.6122,
"step": 550
},
{
"epoch": 0.7066097453260709,
"grad_norm": 0.6030572652816772,
"learning_rate": 9.454410179022932e-05,
"loss": 0.5809,
"step": 600
},
{
"epoch": 0.7654938907699103,
"grad_norm": 0.5781008005142212,
"learning_rate": 9.288422825194501e-05,
"loss": 0.5446,
"step": 650
},
{
"epoch": 0.8243780362137495,
"grad_norm": 0.5412103533744812,
"learning_rate": 9.102301097269974e-05,
"loss": 0.5339,
"step": 700
},
{
"epoch": 0.8832621816575887,
"grad_norm": 0.5678456425666809,
"learning_rate": 8.896918846697821e-05,
"loss": 0.5296,
"step": 750
},
{
"epoch": 0.942146327101428,
"grad_norm": 0.525556206703186,
"learning_rate": 8.673240354108538e-05,
"loss": 0.5176,
"step": 800
},
{
"epoch": 1.0011776829088768,
"grad_norm": 1.9685856103897095,
"learning_rate": 8.432315801965616e-05,
"loss": 0.5104,
"step": 850
},
{
"epoch": 1.0600618283527161,
"grad_norm": 0.6006094217300415,
"learning_rate": 8.175276343902802e-05,
"loss": 0.4685,
"step": 900
},
{
"epoch": 1.1189459737965552,
"grad_norm": 0.5228903889656067,
"learning_rate": 7.903328793897418e-05,
"loss": 0.473,
"step": 950
},
{
"epoch": 1.1778301192403946,
"grad_norm": 0.5006899237632751,
"learning_rate": 7.6177499602143e-05,
"loss": 0.4679,
"step": 1000
},
{
"epoch": 1.1778301192403946,
"eval_loss": 0.4844963848590851,
"eval_runtime": 2172.4438,
"eval_samples_per_second": 1.39,
"eval_steps_per_second": 0.695,
"step": 1000
},
{
"epoch": 1.2367142646842337,
"grad_norm": 0.5041179060935974,
"learning_rate": 7.319880650722838e-05,
"loss": 0.4541,
"step": 1050
},
{
"epoch": 1.295598410128073,
"grad_norm": 0.5369197726249695,
"learning_rate": 7.01111937773246e-05,
"loss": 0.4576,
"step": 1100
},
{
"epoch": 1.3544825555719122,
"grad_norm": 0.5211925506591797,
"learning_rate": 6.692915791902665e-05,
"loss": 0.4472,
"step": 1150
},
{
"epoch": 1.4133667010157516,
"grad_norm": 0.5664705038070679,
"learning_rate": 6.366763876055806e-05,
"loss": 0.4427,
"step": 1200
},
{
"epoch": 1.4722508464595907,
"grad_norm": 0.5420666337013245,
"learning_rate": 6.034194930847975e-05,
"loss": 0.4395,
"step": 1250
},
{
"epoch": 1.53113499190343,
"grad_norm": 0.558952271938324,
"learning_rate": 5.6967703852306786e-05,
"loss": 0.4305,
"step": 1300
},
{
"epoch": 1.5900191373472694,
"grad_norm": 0.510136067867279,
"learning_rate": 5.356074465458553e-05,
"loss": 0.428,
"step": 1350
},
{
"epoch": 1.6489032827911085,
"grad_norm": 0.506799578666687,
"learning_rate": 5.013706757062534e-05,
"loss": 0.4251,
"step": 1400
},
{
"epoch": 1.7077874282349477,
"grad_norm": 0.5179591178894043,
"learning_rate": 4.671274694710388e-05,
"loss": 0.4188,
"step": 1450
},
{
"epoch": 1.766671573678787,
"grad_norm": 0.531908392906189,
"learning_rate": 4.3303860152151445e-05,
"loss": 0.4177,
"step": 1500
},
{
"epoch": 1.766671573678787,
"eval_loss": 0.4321376383304596,
"eval_runtime": 2174.1694,
"eval_samples_per_second": 1.389,
"eval_steps_per_second": 0.695,
"step": 1500
}
],
"logging_steps": 50,
"max_steps": 2547,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.011350882666414e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}