File size: 2,516 Bytes
b4b5923 a34d7ae b4b5923 a34d7ae 3ee2afb a34d7ae b4b5923 a34d7ae b4b5923 a34d7ae 553c12f a34d7ae 553c12f a34d7ae 553c12f a34d7ae 3ee2afb a34d7ae 3ee2afb a34d7ae 3ee2afb a34d7ae 3ee2afb a34d7ae 3ee2afb a34d7ae b4b5923 a34d7ae b4b5923 a34d7ae b4b5923 a34d7ae b4b5923 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.6,
"eval_steps": 5,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 0.7405178384640212,
"learning_rate": 0.0001,
"loss": 1.0732,
"step": 1
},
{
"epoch": 0.4,
"grad_norm": 0.8403334341079279,
"learning_rate": 0.00018660254037844388,
"loss": 0.865,
"step": 5
},
{
"epoch": 0.4,
"eval_loss": 0.7255128622055054,
"eval_runtime": 3.1356,
"eval_samples_per_second": 6.378,
"eval_steps_per_second": 1.595,
"step": 5
},
{
"epoch": 0.8,
"grad_norm": 0.46504703007968234,
"learning_rate": 0.00011736481776669306,
"loss": 0.6956,
"step": 10
},
{
"epoch": 0.8,
"eval_loss": 0.630705714225769,
"eval_runtime": 1.9072,
"eval_samples_per_second": 10.486,
"eval_steps_per_second": 2.622,
"step": 10
},
{
"epoch": 1.2,
"grad_norm": 0.41977051641880375,
"learning_rate": 3.5721239031346066e-05,
"loss": 0.5421,
"step": 15
},
{
"epoch": 1.2,
"eval_loss": 0.6072101593017578,
"eval_runtime": 1.9054,
"eval_samples_per_second": 10.497,
"eval_steps_per_second": 2.624,
"step": 15
},
{
"epoch": 1.6,
"grad_norm": 0.42132802092200616,
"learning_rate": 0.0,
"loss": 0.5015,
"step": 20
},
{
"epoch": 1.6,
"eval_loss": 0.6025974750518799,
"eval_runtime": 1.8831,
"eval_samples_per_second": 10.621,
"eval_steps_per_second": 2.655,
"step": 20
},
{
"epoch": 1.6,
"step": 20,
"total_flos": 1368128028672.0,
"train_loss": 0.66145840883255,
"train_runtime": 82.9739,
"train_samples_per_second": 1.928,
"train_steps_per_second": 0.241
}
],
"logging_steps": 5,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1368128028672.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|