A-soldier-Memory / trainer_state.json
AmritJain's picture
Upload trained LoRA adapter
8f2374f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 100,
"global_step": 90,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1694915254237288,
"grad_norm": 1.3848826885223389,
"learning_rate": 0.0002985402103112355,
"loss": 2.723,
"num_input_tokens_seen": 175104,
"step": 5,
"train_runtime": 83.0375,
"train_tokens_per_second": 2108.734
},
{
"epoch": 0.3389830508474576,
"grad_norm": 0.6895682215690613,
"learning_rate": 0.00029265847744427303,
"loss": 2.3916,
"num_input_tokens_seen": 354944,
"step": 10,
"train_runtime": 164.5936,
"train_tokens_per_second": 2156.487
},
{
"epoch": 0.5084745762711864,
"grad_norm": 0.6077523827552795,
"learning_rate": 0.000282442138928839,
"loss": 2.2298,
"num_input_tokens_seen": 540288,
"step": 15,
"train_runtime": 235.3105,
"train_tokens_per_second": 2296.064
},
{
"epoch": 0.6779661016949152,
"grad_norm": 0.6539971828460693,
"learning_rate": 0.00026820161304100823,
"loss": 2.1544,
"num_input_tokens_seen": 724032,
"step": 20,
"train_runtime": 314.4139,
"train_tokens_per_second": 2302.799
},
{
"epoch": 0.847457627118644,
"grad_norm": 0.5982179045677185,
"learning_rate": 0.0002503695909538287,
"loss": 2.093,
"num_input_tokens_seen": 915328,
"step": 25,
"train_runtime": 621.4047,
"train_tokens_per_second": 1472.998
},
{
"epoch": 1.0,
"grad_norm": 0.7921583652496338,
"learning_rate": 0.0002294878896349807,
"loss": 1.9914,
"num_input_tokens_seen": 1076448,
"step": 30,
"train_runtime": 945.877,
"train_tokens_per_second": 1138.042
},
{
"epoch": 1.1694915254237288,
"grad_norm": 0.6233165264129639,
"learning_rate": 0.0002061909890123868,
"loss": 1.8562,
"num_input_tokens_seen": 1255264,
"step": 35,
"train_runtime": 1311.2231,
"train_tokens_per_second": 957.323
},
{
"epoch": 1.3389830508474576,
"grad_norm": 0.6240576505661011,
"learning_rate": 0.00018118675362266385,
"loss": 1.8674,
"num_input_tokens_seen": 1441760,
"step": 40,
"train_runtime": 1702.9404,
"train_tokens_per_second": 846.63
},
{
"epoch": 1.5084745762711864,
"grad_norm": 0.5960806608200073,
"learning_rate": 0.00015523492450537517,
"loss": 1.8146,
"num_input_tokens_seen": 1629792,
"step": 45,
"train_runtime": 2106.6678,
"train_tokens_per_second": 773.635
},
{
"epoch": 1.6779661016949152,
"grad_norm": 0.6159402132034302,
"learning_rate": 0.0001291240348559902,
"loss": 1.7993,
"num_input_tokens_seen": 1810016,
"step": 50,
"train_runtime": 2464.8114,
"train_tokens_per_second": 734.343
},
{
"epoch": 1.847457627118644,
"grad_norm": 0.6029852628707886,
"learning_rate": 0.0001036474508437579,
"loss": 1.7685,
"num_input_tokens_seen": 1995104,
"step": 55,
"train_runtime": 2847.5911,
"train_tokens_per_second": 700.629
},
{
"epoch": 2.0,
"grad_norm": 0.8139386773109436,
"learning_rate": 7.957926558211642e-05,
"loss": 1.7569,
"num_input_tokens_seen": 2155824,
"step": 60,
"train_runtime": 3157.3683,
"train_tokens_per_second": 682.791
},
{
"epoch": 2.169491525423729,
"grad_norm": 0.603970468044281,
"learning_rate": 5.765077870115125e-05,
"loss": 1.6611,
"num_input_tokens_seen": 2341104,
"step": 65,
"train_runtime": 3543.124,
"train_tokens_per_second": 660.746
},
{
"epoch": 2.3389830508474576,
"grad_norm": 0.6044986248016357,
"learning_rate": 3.852827617839084e-05,
"loss": 1.6719,
"num_input_tokens_seen": 2521200,
"step": 70,
"train_runtime": 3896.3924,
"train_tokens_per_second": 647.06
},
{
"epoch": 2.5084745762711864,
"grad_norm": 0.5742617249488831,
"learning_rate": 2.2792785576536105e-05,
"loss": 1.6857,
"num_input_tokens_seen": 2709040,
"step": 75,
"train_runtime": 4297.8213,
"train_tokens_per_second": 630.329
},
{
"epoch": 2.6779661016949152,
"grad_norm": 0.5705291628837585,
"learning_rate": 1.0922421814981901e-05,
"loss": 1.6768,
"num_input_tokens_seen": 2896048,
"step": 80,
"train_runtime": 4682.8232,
"train_tokens_per_second": 618.441
},
{
"epoch": 2.847457627118644,
"grad_norm": 0.5796510577201843,
"learning_rate": 3.2778598899291465e-06,
"loss": 1.628,
"num_input_tokens_seen": 3073200,
"step": 85,
"train_runtime": 5015.5353,
"train_tokens_per_second": 612.736
},
{
"epoch": 3.0,
"grad_norm": 0.8749147057533264,
"learning_rate": 9.137594713563568e-08,
"loss": 1.6688,
"num_input_tokens_seen": 3234144,
"step": 90,
"train_runtime": 5323.9438,
"train_tokens_per_second": 607.471
},
{
"epoch": 3.0,
"num_input_tokens_seen": 3234144,
"step": 90,
"total_flos": 5179803374518272.0,
"train_loss": 1.9132584571838378,
"train_runtime": 5325.3956,
"train_samples_per_second": 1.062,
"train_steps_per_second": 0.017
}
],
"logging_steps": 5,
"max_steps": 90,
"num_input_tokens_seen": 3234144,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5179803374518272.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}