working / Model /last-checkpoint /trainer_state.json
youssefedweqd's picture
Training in progress, step 1500
80a845c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.1778301192403946,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014721036360959812,
"grad_norm": 1.0893694162368774,
"learning_rate": 4.808635917566242e-06,
"loss": 1.1444,
"step": 50
},
{
"epoch": 0.029442072721919624,
"grad_norm": 1.118213415145874,
"learning_rate": 9.715407262021591e-06,
"loss": 0.9497,
"step": 100
},
{
"epoch": 0.04416310908287943,
"grad_norm": 1.3216179609298706,
"learning_rate": 1.4622178606476939e-05,
"loss": 0.8346,
"step": 150
},
{
"epoch": 0.05888414544383925,
"grad_norm": 1.292870044708252,
"learning_rate": 1.9528949950932288e-05,
"loss": 0.7699,
"step": 200
},
{
"epoch": 0.07360518180479905,
"grad_norm": 1.4046356678009033,
"learning_rate": 2.4435721295387637e-05,
"loss": 0.7252,
"step": 250
},
{
"epoch": 0.08832621816575886,
"grad_norm": 1.4438663721084595,
"learning_rate": 2.9342492639842983e-05,
"loss": 0.6996,
"step": 300
},
{
"epoch": 0.10304725452671869,
"grad_norm": 1.3257337808609009,
"learning_rate": 3.424926398429833e-05,
"loss": 0.686,
"step": 350
},
{
"epoch": 0.1177682908876785,
"grad_norm": 1.3639789819717407,
"learning_rate": 3.9156035328753685e-05,
"loss": 0.6601,
"step": 400
},
{
"epoch": 0.1324893272486383,
"grad_norm": 1.4148070812225342,
"learning_rate": 4.406280667320903e-05,
"loss": 0.6371,
"step": 450
},
{
"epoch": 0.1472103636095981,
"grad_norm": 1.3940412998199463,
"learning_rate": 4.8969578017664384e-05,
"loss": 0.6417,
"step": 500
},
{
"epoch": 0.6477255998822317,
"grad_norm": 0.6072946190834045,
"learning_rate": 9.599483839268026e-05,
"loss": 0.6122,
"step": 550
},
{
"epoch": 0.7066097453260709,
"grad_norm": 0.6030572652816772,
"learning_rate": 9.454410179022932e-05,
"loss": 0.5809,
"step": 600
},
{
"epoch": 0.7654938907699103,
"grad_norm": 0.5781008005142212,
"learning_rate": 9.288422825194501e-05,
"loss": 0.5446,
"step": 650
},
{
"epoch": 0.8243780362137495,
"grad_norm": 0.5412103533744812,
"learning_rate": 9.102301097269974e-05,
"loss": 0.5339,
"step": 700
},
{
"epoch": 0.8832621816575887,
"grad_norm": 0.5678456425666809,
"learning_rate": 8.896918846697821e-05,
"loss": 0.5296,
"step": 750
},
{
"epoch": 0.942146327101428,
"grad_norm": 0.525556206703186,
"learning_rate": 8.673240354108538e-05,
"loss": 0.5176,
"step": 800
},
{
"epoch": 1.0011776829088768,
"grad_norm": 1.9685856103897095,
"learning_rate": 8.432315801965616e-05,
"loss": 0.5104,
"step": 850
},
{
"epoch": 1.0600618283527161,
"grad_norm": 0.6006094217300415,
"learning_rate": 8.175276343902802e-05,
"loss": 0.4685,
"step": 900
},
{
"epoch": 1.1189459737965552,
"grad_norm": 0.5228903889656067,
"learning_rate": 7.903328793897418e-05,
"loss": 0.473,
"step": 950
},
{
"epoch": 1.1778301192403946,
"grad_norm": 0.5006899237632751,
"learning_rate": 7.6177499602143e-05,
"loss": 0.4679,
"step": 1000
},
{
"epoch": 1.1778301192403946,
"eval_loss": 0.4844963848590851,
"eval_runtime": 2172.4438,
"eval_samples_per_second": 1.39,
"eval_steps_per_second": 0.695,
"step": 1000
}
],
"logging_steps": 50,
"max_steps": 2547,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.1226707705777357e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}