saepark's picture
Upload folder using huggingface_hub
9d845cb verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1,
"global_step": 17,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.058823529411764705,
"grad_norm": 27.23904800415039,
"learning_rate": 1e-07,
"loss": 2.4516,
"step": 1
},
{
"epoch": 0.11764705882352941,
"grad_norm": 27.284263610839844,
"learning_rate": 9.91486549841951e-08,
"loss": 2.4052,
"step": 2
},
{
"epoch": 0.17647058823529413,
"grad_norm": 26.715576171875,
"learning_rate": 9.662361147021778e-08,
"loss": 2.4273,
"step": 3
},
{
"epoch": 0.23529411764705882,
"grad_norm": 27.109235763549805,
"learning_rate": 9.25108567864807e-08,
"loss": 2.372,
"step": 4
},
{
"epoch": 0.29411764705882354,
"grad_norm": 22.088886260986328,
"learning_rate": 8.695044586103295e-08,
"loss": 2.0321,
"step": 5
},
{
"epoch": 0.35294117647058826,
"grad_norm": 29.467775344848633,
"learning_rate": 8.013173181896283e-08,
"loss": 2.5105,
"step": 6
},
{
"epoch": 0.4117647058823529,
"grad_norm": 25.27092742919922,
"learning_rate": 7.228691778882692e-08,
"loss": 2.2482,
"step": 7
},
{
"epoch": 0.47058823529411764,
"grad_norm": 28.22859001159668,
"learning_rate": 6.368314950360415e-08,
"loss": 2.4815,
"step": 8
},
{
"epoch": 0.47058823529411764,
"eval_loss": 2.301804542541504,
"eval_runtime": 3.8155,
"eval_samples_per_second": 0.786,
"eval_steps_per_second": 0.262,
"step": 8
},
{
"checkpoint_runtime": 66.2871
},
{
"epoch": 0.5294117647058824,
"grad_norm": 32.29556655883789,
"learning_rate": 5.46134179731651e-08,
"loss": 2.1465,
"step": 9
},
{
"epoch": 0.5294117647058824,
"eval_loss": 2.299485921859741,
"eval_runtime": 3.8091,
"eval_samples_per_second": 0.788,
"eval_steps_per_second": 0.263,
"step": 9
},
{
"checkpoint_runtime": 70.0087
},
{
"epoch": 0.5882352941176471,
"grad_norm": 28.886428833007812,
"learning_rate": 4.5386582026834904e-08,
"loss": 2.4141,
"step": 10
},
{
"epoch": 0.5882352941176471,
"eval_loss": 2.2999422550201416,
"eval_runtime": 3.8063,
"eval_samples_per_second": 0.788,
"eval_steps_per_second": 0.263,
"step": 10
},
{
"checkpoint_runtime": 67.5608
},
{
"epoch": 0.6470588235294118,
"grad_norm": 26.31535530090332,
"learning_rate": 3.6316850496395855e-08,
"loss": 2.3997,
"step": 11
},
{
"epoch": 0.6470588235294118,
"eval_loss": 2.2996702194213867,
"eval_runtime": 3.7927,
"eval_samples_per_second": 0.791,
"eval_steps_per_second": 0.264,
"step": 11
},
{
"checkpoint_runtime": 72.5546
},
{
"epoch": 0.7058823529411765,
"grad_norm": 26.56271743774414,
"learning_rate": 2.771308221117309e-08,
"loss": 2.3832,
"step": 12
},
{
"epoch": 0.7058823529411765,
"eval_loss": 2.2992560863494873,
"eval_runtime": 3.8194,
"eval_samples_per_second": 0.785,
"eval_steps_per_second": 0.262,
"step": 12
},
{
"checkpoint_runtime": 74.4665
},
{
"epoch": 0.7647058823529411,
"grad_norm": 30.143421173095703,
"learning_rate": 1.9868268181037185e-08,
"loss": 2.5278,
"step": 13
},
{
"epoch": 0.7647058823529411,
"eval_loss": 2.2993052005767822,
"eval_runtime": 3.8917,
"eval_samples_per_second": 0.771,
"eval_steps_per_second": 0.257,
"step": 13
},
{
"checkpoint_runtime": 74.9732
},
{
"epoch": 0.8235294117647058,
"grad_norm": 27.023157119750977,
"learning_rate": 1.304955413896705e-08,
"loss": 2.346,
"step": 14
},
{
"epoch": 0.8235294117647058,
"eval_loss": 2.2990870475769043,
"eval_runtime": 4.6942,
"eval_samples_per_second": 0.639,
"eval_steps_per_second": 0.213,
"step": 14
},
{
"checkpoint_runtime": 80.5035
},
{
"epoch": 0.8823529411764706,
"grad_norm": 26.858203887939453,
"learning_rate": 7.4891432135193e-09,
"loss": 2.3549,
"step": 15
},
{
"epoch": 0.8823529411764706,
"eval_loss": 2.2999765872955322,
"eval_runtime": 6.1015,
"eval_samples_per_second": 0.492,
"eval_steps_per_second": 0.164,
"step": 15
},
{
"checkpoint_runtime": 77.296
},
{
"epoch": 0.9411764705882353,
"grad_norm": 28.46251678466797,
"learning_rate": 3.376388529782215e-09,
"loss": 2.428,
"step": 16
},
{
"epoch": 0.9411764705882353,
"eval_loss": 2.2982423305511475,
"eval_runtime": 5.4136,
"eval_samples_per_second": 0.554,
"eval_steps_per_second": 0.185,
"step": 16
},
{
"checkpoint_runtime": 86.2539
},
{
"epoch": 1.0,
"grad_norm": 29.63337516784668,
"learning_rate": 8.513450158049107e-10,
"loss": 2.4294,
"step": 17
},
{
"epoch": 1.0,
"eval_loss": 2.29897141456604,
"eval_runtime": 5.6926,
"eval_samples_per_second": 0.527,
"eval_steps_per_second": 0.176,
"step": 17
}
],
"logging_steps": 1.0,
"max_steps": 17,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0006846020937318e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}