DigiSapien1.0 / checkpoint-100 /trainer_state.json
jshaigler's picture
Upload folder using huggingface_hub
3a6b2bc verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0010985117911509383,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00010985117911509383,
"grad_norm": 2.085447311401367,
"learning_rate": 0.0001936842105263158,
"loss": 3.2352,
"step": 10
},
{
"epoch": 0.00021970235823018765,
"grad_norm": 1.401716947555542,
"learning_rate": 0.00017473684210526317,
"loss": 1.3128,
"step": 20
},
{
"epoch": 0.0003295535373452815,
"grad_norm": 0.77020263671875,
"learning_rate": 0.0001536842105263158,
"loss": 0.8921,
"step": 30
},
{
"epoch": 0.0004394047164603753,
"grad_norm": 0.7875494956970215,
"learning_rate": 0.00013263157894736842,
"loss": 0.8159,
"step": 40
},
{
"epoch": 0.0005492558955754692,
"grad_norm": 0.502890944480896,
"learning_rate": 0.00011157894736842105,
"loss": 0.7328,
"step": 50
},
{
"epoch": 0.000659107074690563,
"grad_norm": 0.42759764194488525,
"learning_rate": 9.052631578947369e-05,
"loss": 0.7261,
"step": 60
},
{
"epoch": 0.0007689582538056568,
"grad_norm": 0.309189110994339,
"learning_rate": 6.947368421052632e-05,
"loss": 0.7057,
"step": 70
},
{
"epoch": 0.0008788094329207506,
"grad_norm": 0.31502893567085266,
"learning_rate": 4.842105263157895e-05,
"loss": 0.7072,
"step": 80
},
{
"epoch": 0.0009886606120358445,
"grad_norm": 0.24574220180511475,
"learning_rate": 2.7368421052631583e-05,
"loss": 0.7098,
"step": 90
},
{
"epoch": 0.0010985117911509383,
"grad_norm": 0.23801881074905396,
"learning_rate": 6.315789473684211e-06,
"loss": 0.6984,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4021786178666496.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}