testingPyScript / checkpoint-120 /trainer_state.json
andrk9's picture
Upload folder using huggingface_hub
e2ca488 verified
{
"best_metric": 1.6305923461914062,
"best_model_checkpoint": "/scratch/kwamea/llama-output/checkpoint-120",
"epoch": 8.571428571428571,
"eval_steps": 5,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.36,
"learning_rate": 9.642857142857143e-05,
"loss": 2.2959,
"step": 5
},
{
"epoch": 0.36,
"eval_loss": 2.26009202003479,
"eval_runtime": 2.5228,
"eval_samples_per_second": 2.378,
"eval_steps_per_second": 0.396,
"step": 5
},
{
"epoch": 0.71,
"learning_rate": 9.285714285714286e-05,
"loss": 2.2056,
"step": 10
},
{
"epoch": 0.71,
"eval_loss": 2.15522837638855,
"eval_runtime": 2.5338,
"eval_samples_per_second": 2.368,
"eval_steps_per_second": 0.395,
"step": 10
},
{
"epoch": 1.07,
"learning_rate": 8.92857142857143e-05,
"loss": 2.097,
"step": 15
},
{
"epoch": 1.07,
"eval_loss": 2.0667991638183594,
"eval_runtime": 2.5411,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.394,
"step": 15
},
{
"epoch": 1.43,
"learning_rate": 8.571428571428571e-05,
"loss": 2.0293,
"step": 20
},
{
"epoch": 1.43,
"eval_loss": 2.0328972339630127,
"eval_runtime": 2.5419,
"eval_samples_per_second": 2.36,
"eval_steps_per_second": 0.393,
"step": 20
},
{
"epoch": 1.79,
"learning_rate": 8.214285714285714e-05,
"loss": 2.0228,
"step": 25
},
{
"epoch": 1.79,
"eval_loss": 1.998112678527832,
"eval_runtime": 2.5416,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.393,
"step": 25
},
{
"epoch": 2.14,
"learning_rate": 7.857142857142858e-05,
"loss": 1.9493,
"step": 30
},
{
"epoch": 2.14,
"eval_loss": 1.968154788017273,
"eval_runtime": 2.5414,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.393,
"step": 30
},
{
"epoch": 2.5,
"learning_rate": 7.500000000000001e-05,
"loss": 1.9252,
"step": 35
},
{
"epoch": 2.5,
"eval_loss": 1.937127947807312,
"eval_runtime": 2.5401,
"eval_samples_per_second": 2.362,
"eval_steps_per_second": 0.394,
"step": 35
},
{
"epoch": 2.86,
"learning_rate": 7.142857142857143e-05,
"loss": 1.8848,
"step": 40
},
{
"epoch": 2.86,
"eval_loss": 1.9035807847976685,
"eval_runtime": 2.5391,
"eval_samples_per_second": 2.363,
"eval_steps_per_second": 0.394,
"step": 40
},
{
"epoch": 3.21,
"learning_rate": 6.785714285714286e-05,
"loss": 1.8708,
"step": 45
},
{
"epoch": 3.21,
"eval_loss": 1.8712326288223267,
"eval_runtime": 2.5413,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.394,
"step": 45
},
{
"epoch": 3.57,
"learning_rate": 6.428571428571429e-05,
"loss": 1.795,
"step": 50
},
{
"epoch": 3.57,
"eval_loss": 1.8411849737167358,
"eval_runtime": 2.5425,
"eval_samples_per_second": 2.36,
"eval_steps_per_second": 0.393,
"step": 50
},
{
"epoch": 3.93,
"learning_rate": 6.0714285714285715e-05,
"loss": 1.7851,
"step": 55
},
{
"epoch": 3.93,
"eval_loss": 1.8131023645401,
"eval_runtime": 2.5418,
"eval_samples_per_second": 2.36,
"eval_steps_per_second": 0.393,
"step": 55
},
{
"epoch": 4.29,
"learning_rate": 5.714285714285714e-05,
"loss": 1.737,
"step": 60
},
{
"epoch": 4.29,
"eval_loss": 1.7883529663085938,
"eval_runtime": 2.541,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.394,
"step": 60
},
{
"epoch": 4.64,
"learning_rate": 5.3571428571428575e-05,
"loss": 1.7509,
"step": 65
},
{
"epoch": 4.64,
"eval_loss": 1.7668665647506714,
"eval_runtime": 2.5422,
"eval_samples_per_second": 2.36,
"eval_steps_per_second": 0.393,
"step": 65
},
{
"epoch": 5.0,
"learning_rate": 5e-05,
"loss": 1.7,
"step": 70
},
{
"epoch": 5.0,
"eval_loss": 1.7516651153564453,
"eval_runtime": 2.5412,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.394,
"step": 70
},
{
"epoch": 5.36,
"learning_rate": 4.642857142857143e-05,
"loss": 1.6734,
"step": 75
},
{
"epoch": 5.36,
"eval_loss": 1.7334843873977661,
"eval_runtime": 2.5431,
"eval_samples_per_second": 2.359,
"eval_steps_per_second": 0.393,
"step": 75
},
{
"epoch": 5.71,
"learning_rate": 4.2857142857142856e-05,
"loss": 1.6719,
"step": 80
},
{
"epoch": 5.71,
"eval_loss": 1.7061794996261597,
"eval_runtime": 2.5412,
"eval_samples_per_second": 2.361,
"eval_steps_per_second": 0.394,
"step": 80
},
{
"epoch": 6.07,
"learning_rate": 3.928571428571429e-05,
"loss": 1.5932,
"step": 85
},
{
"epoch": 6.07,
"eval_loss": 1.6727076768875122,
"eval_runtime": 2.5367,
"eval_samples_per_second": 2.365,
"eval_steps_per_second": 0.394,
"step": 85
},
{
"epoch": 6.43,
"learning_rate": 3.571428571428572e-05,
"loss": 1.5866,
"step": 90
},
{
"epoch": 6.43,
"eval_loss": 1.66474187374115,
"eval_runtime": 2.5383,
"eval_samples_per_second": 2.364,
"eval_steps_per_second": 0.394,
"step": 90
},
{
"epoch": 6.79,
"learning_rate": 3.2142857142857144e-05,
"loss": 1.577,
"step": 95
},
{
"epoch": 6.79,
"eval_loss": 1.6590815782546997,
"eval_runtime": 2.5384,
"eval_samples_per_second": 2.364,
"eval_steps_per_second": 0.394,
"step": 95
},
{
"epoch": 7.14,
"learning_rate": 2.857142857142857e-05,
"loss": 1.5532,
"step": 100
},
{
"epoch": 7.14,
"eval_loss": 1.6509045362472534,
"eval_runtime": 2.5429,
"eval_samples_per_second": 2.359,
"eval_steps_per_second": 0.393,
"step": 100
},
{
"epoch": 7.5,
"learning_rate": 2.5e-05,
"loss": 1.5099,
"step": 105
},
{
"epoch": 7.5,
"eval_loss": 1.6463295221328735,
"eval_runtime": 2.5379,
"eval_samples_per_second": 2.364,
"eval_steps_per_second": 0.394,
"step": 105
},
{
"epoch": 7.86,
"learning_rate": 2.1428571428571428e-05,
"loss": 1.5717,
"step": 110
},
{
"epoch": 7.86,
"eval_loss": 1.6409095525741577,
"eval_runtime": 2.5359,
"eval_samples_per_second": 2.366,
"eval_steps_per_second": 0.394,
"step": 110
},
{
"epoch": 8.21,
"learning_rate": 1.785714285714286e-05,
"loss": 1.5354,
"step": 115
},
{
"epoch": 8.21,
"eval_loss": 1.6344412565231323,
"eval_runtime": 2.5372,
"eval_samples_per_second": 2.365,
"eval_steps_per_second": 0.394,
"step": 115
},
{
"epoch": 8.57,
"learning_rate": 1.4285714285714285e-05,
"loss": 1.5127,
"step": 120
},
{
"epoch": 8.57,
"eval_loss": 1.6305923461914062,
"eval_runtime": 2.537,
"eval_samples_per_second": 2.365,
"eval_steps_per_second": 0.394,
"step": 120
}
],
"logging_steps": 5,
"max_steps": 140,
"num_train_epochs": 10,
"save_steps": 10,
"total_flos": 3.89964374212608e+16,
"trial_name": null,
"trial_params": null
}