llama3.2_thinking_experiment / trainer_state.json
CodeIsAbstract's picture
Upload fine-tuned model
8af0cfd verified
Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN, "... is not valid JSON
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.96,
"eval_steps": 10,
"global_step": 27,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21333333333333335,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 63.3011,
"step": 2
},
{
"epoch": 0.4266666666666667,
"grad_norm": Infinity,
"learning_rate": 0.0,
"loss": 61.3878,
"step": 4
},
{
"epoch": 0.64,
"grad_norm": 611.62060546875,
"learning_rate": 2.0000000000000003e-06,
"loss": 64.6879,
"step": 6
},
{
"epoch": 0.8533333333333334,
"grad_norm": 343.85888671875,
"learning_rate": 6e-06,
"loss": 59.1893,
"step": 8
},
{
"epoch": 1.1066666666666667,
"grad_norm": 286.243408203125,
"learning_rate": 1e-05,
"loss": 61.2499,
"step": 10
},
{
"epoch": 1.1066666666666667,
"eval_loss": 3.3731160163879395,
"eval_runtime": 3.4128,
"eval_samples_per_second": 5.86,
"eval_steps_per_second": 2.051,
"step": 10
},
{
"epoch": 1.32,
"grad_norm": 289.30523681640625,
"learning_rate": 1.4e-05,
"loss": 55.2484,
"step": 12
},
{
"epoch": 1.5333333333333332,
"grad_norm": 334.54632568359375,
"learning_rate": 1.8e-05,
"loss": 56.5117,
"step": 14
},
{
"epoch": 1.7466666666666666,
"grad_norm": 328.0875549316406,
"learning_rate": 1.982973099683902e-05,
"loss": 54.6102,
"step": 16
},
{
"epoch": 1.96,
"grad_norm": 249.81207275390625,
"learning_rate": 1.8502171357296144e-05,
"loss": 55.2078,
"step": 18
},
{
"epoch": 2.2133333333333334,
"grad_norm": 240.95912170410156,
"learning_rate": 1.6026346363792565e-05,
"loss": 56.2904,
"step": 20
},
{
"epoch": 2.2133333333333334,
"eval_loss": 3.224900007247925,
"eval_runtime": 3.438,
"eval_samples_per_second": 5.817,
"eval_steps_per_second": 2.036,
"step": 20
},
{
"epoch": 2.4266666666666667,
"grad_norm": 274.64892578125,
"learning_rate": 1.2736629900720832e-05,
"loss": 52.4623,
"step": 22
},
{
"epoch": 2.64,
"grad_norm": 275.81597900390625,
"learning_rate": 9.07731640536698e-06,
"loss": 53.445,
"step": 24
},
{
"epoch": 2.8533333333333335,
"grad_norm": 252.26492309570312,
"learning_rate": 5.542616442234618e-06,
"loss": 53.8146,
"step": 26
}
],
"logging_steps": 2,
"max_steps": 27,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.910272351731712e+16,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}