llama3_lora / trainer_state.json
Abhimanyu9539's picture
End of training
b6b8aea verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 222,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06756756756756757,
"grad_norm": 0.7969902753829956,
"learning_rate": 1.0869565217391305e-05,
"loss": 1.2661,
"step": 5
},
{
"epoch": 0.13513513513513514,
"grad_norm": 0.44543957710266113,
"learning_rate": 2.173913043478261e-05,
"loss": 1.2326,
"step": 10
},
{
"epoch": 0.20270270270270271,
"grad_norm": 0.6617331504821777,
"learning_rate": 3.260869565217392e-05,
"loss": 1.2069,
"step": 15
},
{
"epoch": 0.2702702702702703,
"grad_norm": 1.0947309732437134,
"learning_rate": 4.347826086956522e-05,
"loss": 1.2354,
"step": 20
},
{
"epoch": 0.33783783783783783,
"grad_norm": 0.6359620690345764,
"learning_rate": 4.998753972815435e-05,
"loss": 0.9942,
"step": 25
},
{
"epoch": 0.40540540540540543,
"grad_norm": 0.7191665768623352,
"learning_rate": 4.98475042744222e-05,
"loss": 1.0699,
"step": 30
},
{
"epoch": 0.47297297297297297,
"grad_norm": 0.8076795339584351,
"learning_rate": 4.955273299787451e-05,
"loss": 1.1392,
"step": 35
},
{
"epoch": 0.5405405405405406,
"grad_norm": 0.512813150882721,
"learning_rate": 4.910506156279029e-05,
"loss": 1.0548,
"step": 40
},
{
"epoch": 0.6081081081081081,
"grad_norm": 0.4570145905017853,
"learning_rate": 4.850727780681689e-05,
"loss": 0.9872,
"step": 45
},
{
"epoch": 0.6756756756756757,
"grad_norm": 1.2885775566101074,
"learning_rate": 4.7763104379936555e-05,
"loss": 1.0752,
"step": 50
},
{
"epoch": 0.7432432432432432,
"grad_norm": 0.8429825305938721,
"learning_rate": 4.6877175561964846e-05,
"loss": 1.0264,
"step": 55
},
{
"epoch": 0.8108108108108109,
"grad_norm": 0.7777895331382751,
"learning_rate": 4.585500840294794e-05,
"loss": 1.054,
"step": 60
},
{
"epoch": 0.8783783783783784,
"grad_norm": 0.5317078828811646,
"learning_rate": 4.470296836617981e-05,
"loss": 1.1165,
"step": 65
},
{
"epoch": 0.9459459459459459,
"grad_norm": 0.7687781453132629,
"learning_rate": 4.342822968779448e-05,
"loss": 1.0686,
"step": 70
},
{
"epoch": 1.0135135135135136,
"grad_norm": 1.8014172315597534,
"learning_rate": 4.203873069979085e-05,
"loss": 0.93,
"step": 75
},
{
"epoch": 1.0810810810810811,
"grad_norm": 1.656991720199585,
"learning_rate": 4.054312439471239e-05,
"loss": 0.7995,
"step": 80
},
{
"epoch": 1.1486486486486487,
"grad_norm": 0.9098888039588928,
"learning_rate": 3.8950724539836004e-05,
"loss": 0.6742,
"step": 85
},
{
"epoch": 1.2162162162162162,
"grad_norm": 0.6281040906906128,
"learning_rate": 3.727144767643984e-05,
"loss": 0.7291,
"step": 90
},
{
"epoch": 1.2837837837837838,
"grad_norm": 0.4193679690361023,
"learning_rate": 3.5515751365344605e-05,
"loss": 0.6615,
"step": 95
},
{
"epoch": 1.3513513513513513,
"grad_norm": 0.6575683951377869,
"learning_rate": 3.369456906329956e-05,
"loss": 0.7908,
"step": 100
},
{
"epoch": 1.4189189189189189,
"grad_norm": 0.712852418422699,
"learning_rate": 3.181924203576508e-05,
"loss": 0.7457,
"step": 105
},
{
"epoch": 1.4864864864864864,
"grad_norm": 0.8350839018821716,
"learning_rate": 2.990144873009946e-05,
"loss": 0.7112,
"step": 110
},
{
"epoch": 1.554054054054054,
"grad_norm": 0.7229794263839722,
"learning_rate": 2.7953132048972703e-05,
"loss": 0.6355,
"step": 115
},
{
"epoch": 1.6216216216216215,
"grad_norm": 0.7980140447616577,
"learning_rate": 2.5986424976906322e-05,
"loss": 0.7024,
"step": 120
},
{
"epoch": 1.689189189189189,
"grad_norm": 0.738429605960846,
"learning_rate": 2.4013575023093684e-05,
"loss": 0.6657,
"step": 125
},
{
"epoch": 1.7567567567567568,
"grad_norm": 0.9233659505844116,
"learning_rate": 2.2046867951027303e-05,
"loss": 0.7342,
"step": 130
},
{
"epoch": 1.8243243243243243,
"grad_norm": 1.2013847827911377,
"learning_rate": 2.009855126990055e-05,
"loss": 0.7341,
"step": 135
},
{
"epoch": 1.8918918918918919,
"grad_norm": 0.5938061475753784,
"learning_rate": 1.8180757964234924e-05,
"loss": 0.7216,
"step": 140
},
{
"epoch": 1.9594594594594594,
"grad_norm": 1.7424287796020508,
"learning_rate": 1.630543093670044e-05,
"loss": 0.6582,
"step": 145
},
{
"epoch": 2.027027027027027,
"grad_norm": 0.810746967792511,
"learning_rate": 1.4484248634655401e-05,
"loss": 0.556,
"step": 150
},
{
"epoch": 2.0945945945945947,
"grad_norm": 0.9153107404708862,
"learning_rate": 1.2728552323560161e-05,
"loss": 0.4376,
"step": 155
},
{
"epoch": 2.1621621621621623,
"grad_norm": 1.0982611179351807,
"learning_rate": 1.1049275460163999e-05,
"loss": 0.4013,
"step": 160
},
{
"epoch": 2.22972972972973,
"grad_norm": 0.7413326501846313,
"learning_rate": 9.456875605287624e-06,
"loss": 0.3845,
"step": 165
},
{
"epoch": 2.2972972972972974,
"grad_norm": 0.8262110352516174,
"learning_rate": 7.961269300209159e-06,
"loss": 0.4343,
"step": 170
},
{
"epoch": 2.364864864864865,
"grad_norm": 0.6822178363800049,
"learning_rate": 6.57177031220553e-06,
"loss": 0.4385,
"step": 175
},
{
"epoch": 2.4324324324324325,
"grad_norm": 1.367874026298523,
"learning_rate": 5.297031633820193e-06,
"loss": 0.3999,
"step": 180
},
{
"epoch": 2.5,
"grad_norm": 0.5967473983764648,
"learning_rate": 4.144991597052065e-06,
"loss": 0.4239,
"step": 185
},
{
"epoch": 2.5675675675675675,
"grad_norm": 1.1111470460891724,
"learning_rate": 3.1228244380351602e-06,
"loss": 0.5347,
"step": 190
},
{
"epoch": 2.635135135135135,
"grad_norm": 0.8339570760726929,
"learning_rate": 2.236895620063445e-06,
"loss": 0.3577,
"step": 195
},
{
"epoch": 2.7027027027027026,
"grad_norm": 0.9222090840339661,
"learning_rate": 1.4927221931831131e-06,
"loss": 0.2796,
"step": 200
},
{
"epoch": 2.77027027027027,
"grad_norm": 1.083082914352417,
"learning_rate": 8.949384372097081e-07,
"loss": 0.3745,
"step": 205
},
{
"epoch": 2.8378378378378377,
"grad_norm": 0.9275721311569214,
"learning_rate": 4.472670021254899e-07,
"loss": 0.488,
"step": 210
},
{
"epoch": 2.9054054054054053,
"grad_norm": 0.7282541394233704,
"learning_rate": 1.5249572557780588e-07,
"loss": 0.4765,
"step": 215
},
{
"epoch": 2.972972972972973,
"grad_norm": 0.6620603203773499,
"learning_rate": 1.2460271845654569e-08,
"loss": 0.4063,
"step": 220
},
{
"epoch": 3.0,
"step": 222,
"total_flos": 1.741913829015552e+16,
"train_loss": 0.7424485769357767,
"train_runtime": 2114.3102,
"train_samples_per_second": 0.839,
"train_steps_per_second": 0.105
}
],
"logging_steps": 5,
"max_steps": 222,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.741913829015552e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}