tetra-ind-w15k-20k / trainer_state.json
bfpill's picture
Upload tokenizer & config to main
9b0e057 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.47340779643464753,
"eval_steps": 100,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00023670389821732376,
"grad_norm": 2.5553174018859863,
"learning_rate": 0.001,
"loss": 10.9944,
"step": 1
},
{
"epoch": 0.023670389821732377,
"grad_norm": 0.545368492603302,
"learning_rate": 0.001,
"loss": 7.7121,
"step": 100
},
{
"epoch": 0.04734077964346475,
"grad_norm": 0.528997540473938,
"learning_rate": 0.001,
"loss": 6.3787,
"step": 200
},
{
"epoch": 0.07101116946519713,
"grad_norm": 1.7439062595367432,
"learning_rate": 0.001,
"loss": 5.9739,
"step": 300
},
{
"epoch": 0.0946815592869295,
"grad_norm": 1.6054385900497437,
"learning_rate": 0.001,
"loss": 5.7237,
"step": 400
},
{
"epoch": 0.11835194910866188,
"grad_norm": 1.6123534440994263,
"learning_rate": 0.001,
"loss": 5.5599,
"step": 500
},
{
"epoch": 0.14202233893039426,
"grad_norm": 1.2416802644729614,
"learning_rate": 0.001,
"loss": 5.4246,
"step": 600
},
{
"epoch": 0.16569272875212665,
"grad_norm": 0.967766523361206,
"learning_rate": 0.001,
"loss": 5.3145,
"step": 700
},
{
"epoch": 0.189363118573859,
"grad_norm": 1.3783754110336304,
"learning_rate": 0.001,
"loss": 5.2298,
"step": 800
},
{
"epoch": 0.2130335083955914,
"grad_norm": 1.336877703666687,
"learning_rate": 0.001,
"loss": 5.1636,
"step": 900
},
{
"epoch": 0.23670389821732377,
"grad_norm": 1.2909138202667236,
"learning_rate": 0.001,
"loss": 5.1127,
"step": 1000
},
{
"epoch": 0.26037428803905616,
"grad_norm": 0.715796172618866,
"learning_rate": 0.001,
"loss": 5.0768,
"step": 1100
},
{
"epoch": 0.2840446778607885,
"grad_norm": 1.2406651973724365,
"learning_rate": 0.001,
"loss": 5.0397,
"step": 1200
},
{
"epoch": 0.3077150676825209,
"grad_norm": 1.1353099346160889,
"learning_rate": 0.001,
"loss": 5.0067,
"step": 1300
},
{
"epoch": 0.3313854575042533,
"grad_norm": 0.5953907370567322,
"learning_rate": 0.001,
"loss": 4.9782,
"step": 1400
},
{
"epoch": 0.35505584732598566,
"grad_norm": 1.2066272497177124,
"learning_rate": 0.001,
"loss": 4.955,
"step": 1500
},
{
"epoch": 0.378726237147718,
"grad_norm": 1.0147947072982788,
"learning_rate": 0.001,
"loss": 4.937,
"step": 1600
},
{
"epoch": 0.4023966269694504,
"grad_norm": 1.634466290473938,
"learning_rate": 0.001,
"loss": 4.9147,
"step": 1700
},
{
"epoch": 0.4260670167911828,
"grad_norm": 1.3447681665420532,
"learning_rate": 0.001,
"loss": 4.9013,
"step": 1800
},
{
"epoch": 0.44973740661291517,
"grad_norm": 1.3308050632476807,
"learning_rate": 0.001,
"loss": 4.8883,
"step": 1900
},
{
"epoch": 0.47340779643464753,
"grad_norm": 1.0306942462921143,
"learning_rate": 0.001,
"loss": 4.8728,
"step": 2000
}
],
"logging_steps": 100,
"max_steps": 40000,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.2659866836992e+16,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}