File size: 2,245 Bytes
16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df 1fc0701 16f03df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.966777408637874,
"eval_steps": 1000,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.966777408637875e-07,
"loss": 2.3466,
"step": 300
},
{
"epoch": 1.99,
"learning_rate": 1.993355481727575e-06,
"loss": 2.3107,
"step": 600
},
{
"epoch": 2.99,
"learning_rate": 2.9900332225913626e-06,
"loss": 2.2553,
"step": 900
},
{
"epoch": 3.32,
"eval_loss": 2.210308074951172,
"eval_runtime": 21.2106,
"eval_samples_per_second": 80.007,
"eval_steps_per_second": 5.045,
"step": 1000
},
{
"epoch": 3.99,
"learning_rate": 3.98671096345515e-06,
"loss": 2.2071,
"step": 1200
},
{
"epoch": 4.98,
"learning_rate": 4.983388704318937e-06,
"loss": 2.1719,
"step": 1500
},
{
"epoch": 5.98,
"learning_rate": 5.980066445182725e-06,
"loss": 2.1433,
"step": 1800
},
{
"epoch": 6.64,
"eval_loss": 2.1161131858825684,
"eval_runtime": 21.3287,
"eval_samples_per_second": 79.564,
"eval_steps_per_second": 5.017,
"step": 2000
},
{
"epoch": 6.98,
"learning_rate": 6.976744186046513e-06,
"loss": 2.1165,
"step": 2100
},
{
"epoch": 7.97,
"learning_rate": 7.9734219269103e-06,
"loss": 2.0987,
"step": 2400
},
{
"epoch": 8.97,
"learning_rate": 8.970099667774087e-06,
"loss": 2.0829,
"step": 2700
},
{
"epoch": 9.97,
"learning_rate": 9.966777408637874e-06,
"loss": 2.0627,
"step": 3000
},
{
"epoch": 9.97,
"eval_loss": 2.0649547576904297,
"eval_runtime": 21.2155,
"eval_samples_per_second": 79.989,
"eval_steps_per_second": 5.043,
"step": 3000
}
],
"logging_steps": 300,
"max_steps": 6020,
"num_train_epochs": 20,
"save_steps": 1000,
"total_flos": 1.5499830660395827e+17,
"trial_name": null,
"trial_params": null
}
|