untitled-7B / trainer_state.json
“pharaouk”
a
0da118c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.006615506747816882,
"eval_steps": 756,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.929,
"step": 1
},
{
"epoch": 0.0,
"eval_loss": 0.9224275946617126,
"eval_runtime": 2.17,
"eval_samples_per_second": 79.724,
"eval_steps_per_second": 3.687,
"step": 1
},
{
"epoch": 0.0,
"eval_bench_accuracy_agieval": 0.288135593220339,
"eval_bench_accuracy_arc_challenge": 0.8148148148148148,
"eval_bench_accuracy_arc_easy": 0.9074074074074074,
"eval_bench_accuracy_bigbench": 0.3442622950819672,
"eval_bench_accuracy_boolq": 0.5185185185185185,
"eval_bench_accuracy_mmlu": 0.48148148148148145,
"eval_bench_accuracy_openbookqa": 0.14814814814814814,
"eval_bench_accuracy_truthful_qa": 0.37735849056603776,
"eval_bench_accuracy_winogrande": 0.4074074074074074,
"eval_bench_average_accuracy": 0.4763926840717912,
"eval_bench_loss": 5.786159653261484,
"eval_bench_total_accuracy": 0.47283702213279677,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 6.000000000000001e-07,
"loss": 0.8533,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.9641,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 1.8e-06,
"loss": 0.8488,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.8863,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 3e-06,
"loss": 0.7988,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 3.6e-06,
"loss": 0.7789,
"step": 7
},
{
"epoch": 0.0,
"learning_rate": 4.2e-06,
"loss": 0.7144,
"step": 8
},
{
"epoch": 0.0,
"learning_rate": 4.800000000000001e-06,
"loss": 0.8322,
"step": 9
},
{
"epoch": 0.0,
"learning_rate": 5.4e-06,
"loss": 0.734,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 6e-06,
"loss": 0.7861,
"step": 11
},
{
"epoch": 0.0,
"learning_rate": 5.999602806831722e-06,
"loss": 0.6733,
"step": 12
},
{
"epoch": 0.0,
"learning_rate": 5.999205613663445e-06,
"loss": 0.7019,
"step": 13
},
{
"epoch": 0.0,
"learning_rate": 5.9988084204951675e-06,
"loss": 0.7096,
"step": 14
},
{
"epoch": 0.0,
"learning_rate": 5.99841122732689e-06,
"loss": 0.6745,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 5.998014034158613e-06,
"loss": 0.8022,
"step": 16
},
{
"epoch": 0.0,
"learning_rate": 5.997616840990336e-06,
"loss": 0.7753,
"step": 17
},
{
"epoch": 0.0,
"learning_rate": 5.997219647822058e-06,
"loss": 0.6939,
"step": 18
},
{
"epoch": 0.0,
"learning_rate": 5.99682245465378e-06,
"loss": 0.689,
"step": 19
},
{
"epoch": 0.0,
"learning_rate": 5.996425261485502e-06,
"loss": 0.7419,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 5.996028068317225e-06,
"loss": 0.6975,
"step": 21
},
{
"epoch": 0.0,
"learning_rate": 5.9956308751489475e-06,
"loss": 0.686,
"step": 22
},
{
"epoch": 0.0,
"learning_rate": 5.99523368198067e-06,
"loss": 0.7576,
"step": 23
},
{
"epoch": 0.0,
"learning_rate": 5.994836488812393e-06,
"loss": 0.6802,
"step": 24
},
{
"epoch": 0.0,
"learning_rate": 5.994439295644115e-06,
"loss": 0.711,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 5.994042102475838e-06,
"loss": 0.6658,
"step": 26
},
{
"epoch": 0.0,
"learning_rate": 5.99364490930756e-06,
"loss": 0.685,
"step": 27
},
{
"epoch": 0.0,
"learning_rate": 5.993247716139283e-06,
"loss": 0.6881,
"step": 28
},
{
"epoch": 0.0,
"learning_rate": 5.992850522971005e-06,
"loss": 0.7066,
"step": 29
},
{
"epoch": 0.0,
"learning_rate": 5.992453329802727e-06,
"loss": 0.6993,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 5.99205613663445e-06,
"loss": 0.6429,
"step": 31
},
{
"epoch": 0.0,
"learning_rate": 5.991658943466173e-06,
"loss": 0.7205,
"step": 32
},
{
"epoch": 0.0,
"learning_rate": 5.991261750297895e-06,
"loss": 0.703,
"step": 33
},
{
"epoch": 0.0,
"learning_rate": 5.990864557129617e-06,
"loss": 0.6512,
"step": 34
},
{
"epoch": 0.0,
"learning_rate": 5.99046736396134e-06,
"loss": 0.6583,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 5.990070170793063e-06,
"loss": 0.6904,
"step": 36
},
{
"epoch": 0.0,
"learning_rate": 5.989672977624785e-06,
"loss": 0.6619,
"step": 37
},
{
"epoch": 0.01,
"learning_rate": 5.989275784456507e-06,
"loss": 0.7033,
"step": 38
},
{
"epoch": 0.01,
"learning_rate": 5.98887859128823e-06,
"loss": 0.6522,
"step": 39
},
{
"epoch": 0.01,
"learning_rate": 5.9884813981199526e-06,
"loss": 0.6365,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 5.988084204951675e-06,
"loss": 0.6422,
"step": 41
},
{
"epoch": 0.01,
"learning_rate": 5.987687011783397e-06,
"loss": 0.625,
"step": 42
},
{
"epoch": 0.01,
"learning_rate": 5.98728981861512e-06,
"loss": 0.6148,
"step": 43
},
{
"epoch": 0.01,
"learning_rate": 5.986892625446843e-06,
"loss": 0.7089,
"step": 44
},
{
"epoch": 0.01,
"learning_rate": 5.986495432278565e-06,
"loss": 0.681,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 5.986098239110287e-06,
"loss": 0.6505,
"step": 46
},
{
"epoch": 0.01,
"learning_rate": 5.98570104594201e-06,
"loss": 0.6531,
"step": 47
},
{
"epoch": 0.01,
"learning_rate": 5.9853038527737325e-06,
"loss": 0.6683,
"step": 48
},
{
"epoch": 0.01,
"learning_rate": 5.984906659605455e-06,
"loss": 0.6967,
"step": 49
},
{
"epoch": 0.01,
"learning_rate": 5.984509466437178e-06,
"loss": 0.6514,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 15116,
"num_train_epochs": 2,
"save_steps": 50,
"total_flos": 2.097021406150656e+17,
"trial_name": null,
"trial_params": null
}