therapy-llama-adapter / trainer_state.json
Bibek-Mukherjee's picture
Initial adapter upload
1f00b16 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9389671361502347,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 2.5379,
"step": 10
},
{
"epoch": 0.09,
"learning_rate": 0.00019014778325123153,
"loss": 2.1519,
"step": 20
},
{
"epoch": 0.14,
"learning_rate": 0.00018029556650246307,
"loss": 1.8117,
"step": 30
},
{
"epoch": 0.19,
"learning_rate": 0.0001704433497536946,
"loss": 1.6147,
"step": 40
},
{
"epoch": 0.23,
"learning_rate": 0.00016059113300492613,
"loss": 1.5671,
"step": 50
},
{
"epoch": 0.28,
"learning_rate": 0.00015073891625615765,
"loss": 1.5016,
"step": 60
},
{
"epoch": 0.33,
"learning_rate": 0.00014088669950738917,
"loss": 1.454,
"step": 70
},
{
"epoch": 0.38,
"learning_rate": 0.00013103448275862068,
"loss": 1.4734,
"step": 80
},
{
"epoch": 0.42,
"learning_rate": 0.00012118226600985223,
"loss": 1.4959,
"step": 90
},
{
"epoch": 0.47,
"learning_rate": 0.00011133004926108374,
"loss": 1.4655,
"step": 100
},
{
"epoch": 0.52,
"learning_rate": 0.00010147783251231529,
"loss": 1.4204,
"step": 110
},
{
"epoch": 0.56,
"learning_rate": 9.16256157635468e-05,
"loss": 1.3835,
"step": 120
},
{
"epoch": 0.61,
"learning_rate": 8.177339901477834e-05,
"loss": 1.4424,
"step": 130
},
{
"epoch": 0.66,
"learning_rate": 7.192118226600985e-05,
"loss": 1.4117,
"step": 140
},
{
"epoch": 0.7,
"learning_rate": 6.206896551724138e-05,
"loss": 1.445,
"step": 150
},
{
"epoch": 0.75,
"learning_rate": 5.2216748768472914e-05,
"loss": 1.428,
"step": 160
},
{
"epoch": 0.8,
"learning_rate": 4.236453201970443e-05,
"loss": 1.3874,
"step": 170
},
{
"epoch": 0.85,
"learning_rate": 3.251231527093596e-05,
"loss": 1.4699,
"step": 180
},
{
"epoch": 0.89,
"learning_rate": 2.266009852216749e-05,
"loss": 1.4248,
"step": 190
},
{
"epoch": 0.94,
"learning_rate": 1.2807881773399016e-05,
"loss": 1.4369,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 213,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 3.24970513170432e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}