xlam_3epoch_fix_tool / trainer_state.json
AaronWu901225's picture
Upload LoRA adapter folder
7361a34 verified
{
"best_metric": 0.002846540417522192,
"best_model_checkpoint": "./xlam_3epoch_fix_tool/checkpoint-343",
"epoch": 2.982127659574468,
"eval_steps": 49,
"global_step": 438,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06808510638297872,
"grad_norm": 0.11843034625053406,
"learning_rate": 9.090909090909091e-06,
"loss": 0.0267,
"step": 10
},
{
"epoch": 0.13617021276595745,
"grad_norm": 0.10037467628717422,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.0184,
"step": 20
},
{
"epoch": 0.20425531914893616,
"grad_norm": 0.11075304448604584,
"learning_rate": 1.9981755542233175e-05,
"loss": 0.0166,
"step": 30
},
{
"epoch": 0.2723404255319149,
"grad_norm": 0.056458745151758194,
"learning_rate": 1.99077515134553e-05,
"loss": 0.0109,
"step": 40
},
{
"epoch": 0.33361702127659576,
"eval_loss": 0.011730118654668331,
"eval_runtime": 15.7106,
"eval_samples_per_second": 3.374,
"eval_steps_per_second": 3.374,
"step": 49
},
{
"epoch": 0.3404255319148936,
"grad_norm": 0.09626475721597672,
"learning_rate": 1.977726916370847e-05,
"loss": 0.0087,
"step": 50
},
{
"epoch": 0.4085106382978723,
"grad_norm": 0.06276140362024307,
"learning_rate": 1.959105229687389e-05,
"loss": 0.0084,
"step": 60
},
{
"epoch": 0.4765957446808511,
"grad_norm": 0.08935730159282684,
"learning_rate": 1.9350162426854152e-05,
"loss": 0.0053,
"step": 70
},
{
"epoch": 0.5446808510638298,
"grad_norm": 0.08406774699687958,
"learning_rate": 1.9055972726500696e-05,
"loss": 0.0079,
"step": 80
},
{
"epoch": 0.6127659574468085,
"grad_norm": 0.06417492032051086,
"learning_rate": 1.8710160199955158e-05,
"loss": 0.0097,
"step": 90
},
{
"epoch": 0.6672340425531915,
"eval_loss": 0.005933254957199097,
"eval_runtime": 15.6927,
"eval_samples_per_second": 3.377,
"eval_steps_per_second": 3.377,
"step": 98
},
{
"epoch": 0.6808510638297872,
"grad_norm": 0.08632910996675491,
"learning_rate": 1.8314696123025456e-05,
"loss": 0.0039,
"step": 100
},
{
"epoch": 0.7489361702127659,
"grad_norm": 0.05762840434908867,
"learning_rate": 1.7871834806090502e-05,
"loss": 0.0072,
"step": 110
},
{
"epoch": 0.8170212765957446,
"grad_norm": 0.03136470168828964,
"learning_rate": 1.7384100743589698e-05,
"loss": 0.0048,
"step": 120
},
{
"epoch": 0.8851063829787233,
"grad_norm": 0.0481775663793087,
"learning_rate": 1.68542742233504e-05,
"loss": 0.0026,
"step": 130
},
{
"epoch": 0.9531914893617022,
"grad_norm": 0.048834796994924545,
"learning_rate": 1.6285375477786322e-05,
"loss": 0.0025,
"step": 140
},
{
"epoch": 1.0008510638297872,
"eval_loss": 0.0041367849335074425,
"eval_runtime": 15.6638,
"eval_samples_per_second": 3.384,
"eval_steps_per_second": 3.384,
"step": 147
},
{
"epoch": 1.0212765957446808,
"grad_norm": 0.05243828892707825,
"learning_rate": 1.568064746731156e-05,
"loss": 0.0027,
"step": 150
},
{
"epoch": 1.0893617021276596,
"grad_norm": 0.03650873154401779,
"learning_rate": 1.5043537394112008e-05,
"loss": 0.0017,
"step": 160
},
{
"epoch": 1.1574468085106382,
"grad_norm": 0.051568031311035156,
"learning_rate": 1.4377677051653404e-05,
"loss": 0.0027,
"step": 170
},
{
"epoch": 1.225531914893617,
"grad_norm": 0.03690316155552864,
"learning_rate": 1.368686212194199e-05,
"loss": 0.0013,
"step": 180
},
{
"epoch": 1.2936170212765958,
"grad_norm": 0.09455057233572006,
"learning_rate": 1.297503053855203e-05,
"loss": 0.0028,
"step": 190
},
{
"epoch": 1.334468085106383,
"eval_loss": 0.004107403103262186,
"eval_runtime": 15.6495,
"eval_samples_per_second": 3.387,
"eval_steps_per_second": 3.387,
"step": 196
},
{
"epoch": 1.3617021276595744,
"grad_norm": 0.09587451815605164,
"learning_rate": 1.2246240038760042e-05,
"loss": 0.0016,
"step": 200
},
{
"epoch": 1.4297872340425533,
"grad_norm": 0.03558555245399475,
"learning_rate": 1.1504645032747832e-05,
"loss": 0.0011,
"step": 210
},
{
"epoch": 1.4978723404255319,
"grad_norm": 0.059051159769296646,
"learning_rate": 1.0754472921729661e-05,
"loss": 0.0025,
"step": 220
},
{
"epoch": 1.5659574468085107,
"grad_norm": 0.03486516326665878,
"learning_rate": 1e-05,
"loss": 0.0062,
"step": 230
},
{
"epoch": 1.6340425531914895,
"grad_norm": 0.0532718189060688,
"learning_rate": 9.24552707827034e-06,
"loss": 0.0045,
"step": 240
},
{
"epoch": 1.6680851063829787,
"eval_loss": 0.0032062295358628035,
"eval_runtime": 15.7019,
"eval_samples_per_second": 3.375,
"eval_steps_per_second": 3.375,
"step": 245
},
{
"epoch": 1.702127659574468,
"grad_norm": 0.08899789303541183,
"learning_rate": 8.49535496725217e-06,
"loss": 0.0039,
"step": 250
},
{
"epoch": 1.7702127659574467,
"grad_norm": 0.02918567880988121,
"learning_rate": 7.753759961239965e-06,
"loss": 0.0012,
"step": 260
},
{
"epoch": 1.8382978723404255,
"grad_norm": 0.023614011704921722,
"learning_rate": 7.024969461447973e-06,
"loss": 0.0012,
"step": 270
},
{
"epoch": 1.9063829787234043,
"grad_norm": 0.05735747143626213,
"learning_rate": 6.3131378780580134e-06,
"loss": 0.0021,
"step": 280
},
{
"epoch": 1.974468085106383,
"grad_norm": 0.08490067720413208,
"learning_rate": 5.622322948346595e-06,
"loss": 0.0064,
"step": 290
},
{
"epoch": 2.0017021276595743,
"eval_loss": 0.0028720616828650236,
"eval_runtime": 15.7327,
"eval_samples_per_second": 3.369,
"eval_steps_per_second": 3.369,
"step": 294
},
{
"epoch": 2.0425531914893615,
"grad_norm": 0.031081199645996094,
"learning_rate": 4.956462605887994e-06,
"loss": 0.0014,
"step": 300
},
{
"epoch": 2.1106382978723404,
"grad_norm": 0.02734055370092392,
"learning_rate": 4.319352532688444e-06,
"loss": 0.0006,
"step": 310
},
{
"epoch": 2.178723404255319,
"grad_norm": 0.0303335003554821,
"learning_rate": 3.714624522213681e-06,
"loss": 0.0012,
"step": 320
},
{
"epoch": 2.246808510638298,
"grad_norm": 0.04239821061491966,
"learning_rate": 3.145725776649602e-06,
"loss": 0.0014,
"step": 330
},
{
"epoch": 2.3148936170212764,
"grad_norm": 0.0520123615860939,
"learning_rate": 2.615899256410306e-06,
"loss": 0.0019,
"step": 340
},
{
"epoch": 2.33531914893617,
"eval_loss": 0.002846540417522192,
"eval_runtime": 15.6173,
"eval_samples_per_second": 3.394,
"eval_steps_per_second": 3.394,
"step": 343
},
{
"epoch": 2.382978723404255,
"grad_norm": 0.10489223152399063,
"learning_rate": 2.1281651939094996e-06,
"loss": 0.005,
"step": 350
},
{
"epoch": 2.451063829787234,
"grad_norm": 0.06684073060750961,
"learning_rate": 1.6853038769745466e-06,
"loss": 0.0013,
"step": 360
},
{
"epoch": 2.519148936170213,
"grad_norm": 0.04539426416158676,
"learning_rate": 1.2898398000448441e-06,
"loss": 0.0013,
"step": 370
},
{
"epoch": 2.5872340425531917,
"grad_norm": 0.014835229143500328,
"learning_rate": 9.440272734993072e-07,
"loss": 0.0009,
"step": 380
},
{
"epoch": 2.65531914893617,
"grad_norm": 0.04478687047958374,
"learning_rate": 6.498375731458529e-07,
"loss": 0.0007,
"step": 390
},
{
"epoch": 2.668936170212766,
"eval_loss": 0.002925063017755747,
"eval_runtime": 15.5681,
"eval_samples_per_second": 3.404,
"eval_steps_per_second": 3.404,
"step": 392
},
{
"epoch": 2.723404255319149,
"grad_norm": 0.01802811771631241,
"learning_rate": 4.089477031261113e-07,
"loss": 0.0054,
"step": 400
},
{
"epoch": 2.7914893617021277,
"grad_norm": 0.037977054715156555,
"learning_rate": 2.2273083629153148e-07,
"loss": 0.0007,
"step": 410
},
{
"epoch": 2.8595744680851065,
"grad_norm": 0.02204310894012451,
"learning_rate": 9.224848654469932e-08,
"loss": 0.0035,
"step": 420
},
{
"epoch": 2.927659574468085,
"grad_norm": 0.02121455781161785,
"learning_rate": 1.824445776682504e-08,
"loss": 0.0034,
"step": 430
},
{
"epoch": 2.982127659574468,
"step": 438,
"total_flos": 2.997240173615186e+18,
"train_loss": 0.004681217891363997,
"train_runtime": 15083.6638,
"train_samples_per_second": 0.935,
"train_steps_per_second": 0.029
}
],
"logging_steps": 10,
"max_steps": 438,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 49,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.997240173615186e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}