logs / trainer_state.json
zireael08's picture
zireael08/swin-msldv2
0aba07d verified
{
"best_global_step": 747,
"best_metric": 0.9911504424778761,
"best_model_checkpoint": "./logs/checkpoint-747",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 830,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12048192771084337,
"grad_norm": 6.236629486083984,
"learning_rate": 1.0843373493975904e-05,
"loss": 1.7779657363891601,
"step": 10
},
{
"epoch": 0.24096385542168675,
"grad_norm": 6.614838123321533,
"learning_rate": 2.289156626506024e-05,
"loss": 1.6157239913940429,
"step": 20
},
{
"epoch": 0.3614457831325301,
"grad_norm": 8.87565803527832,
"learning_rate": 3.4939759036144585e-05,
"loss": 1.3881938934326172,
"step": 30
},
{
"epoch": 0.4819277108433735,
"grad_norm": 8.774048805236816,
"learning_rate": 4.698795180722892e-05,
"loss": 1.0548779487609863,
"step": 40
},
{
"epoch": 0.6024096385542169,
"grad_norm": 10.919977188110352,
"learning_rate": 5.903614457831326e-05,
"loss": 0.860891056060791,
"step": 50
},
{
"epoch": 0.7228915662650602,
"grad_norm": 17.067983627319336,
"learning_rate": 7.108433734939759e-05,
"loss": 0.7981919765472412,
"step": 60
},
{
"epoch": 0.8433734939759037,
"grad_norm": 9.766481399536133,
"learning_rate": 8.313253012048194e-05,
"loss": 0.7360480785369873,
"step": 70
},
{
"epoch": 0.963855421686747,
"grad_norm": 11.55764102935791,
"learning_rate": 9.518072289156626e-05,
"loss": 0.636728572845459,
"step": 80
},
{
"epoch": 1.0,
"eval_accuracy": 0.8247787610619469,
"eval_loss": 0.46115025877952576,
"eval_runtime": 5.444,
"eval_samples_per_second": 103.785,
"eval_steps_per_second": 1.653,
"step": 83
},
{
"epoch": 1.0843373493975903,
"grad_norm": 13.059717178344727,
"learning_rate": 9.998408238461338e-05,
"loss": 0.5606242656707764,
"step": 90
},
{
"epoch": 1.2048192771084336,
"grad_norm": 7.8438615798950195,
"learning_rate": 9.988684476816419e-05,
"loss": 0.4747779369354248,
"step": 100
},
{
"epoch": 1.3253012048192772,
"grad_norm": 17.873554229736328,
"learning_rate": 9.970138440692705e-05,
"loss": 0.513523006439209,
"step": 110
},
{
"epoch": 1.4457831325301205,
"grad_norm": 11.972646713256836,
"learning_rate": 9.942802927959443e-05,
"loss": 0.5974394321441651,
"step": 120
},
{
"epoch": 1.5662650602409638,
"grad_norm": 16.943544387817383,
"learning_rate": 9.906726280298186e-05,
"loss": 0.452280855178833,
"step": 130
},
{
"epoch": 1.6867469879518073,
"grad_norm": 9.1749906539917,
"learning_rate": 9.861972297712605e-05,
"loss": 0.4720293045043945,
"step": 140
},
{
"epoch": 1.8072289156626506,
"grad_norm": 11.010215759277344,
"learning_rate": 9.808620125700925e-05,
"loss": 0.4886914253234863,
"step": 150
},
{
"epoch": 1.927710843373494,
"grad_norm": 8.292040824890137,
"learning_rate": 9.746764115290496e-05,
"loss": 0.46558895111083987,
"step": 160
},
{
"epoch": 2.0,
"eval_accuracy": 0.8495575221238938,
"eval_loss": 0.3608015775680542,
"eval_runtime": 5.678,
"eval_samples_per_second": 99.507,
"eval_steps_per_second": 1.585,
"step": 166
},
{
"epoch": 2.0481927710843375,
"grad_norm": 17.205005645751953,
"learning_rate": 9.676513656182058e-05,
"loss": 0.36324758529663087,
"step": 170
},
{
"epoch": 2.1686746987951806,
"grad_norm": 11.862001419067383,
"learning_rate": 9.597992983298747e-05,
"loss": 0.4520224094390869,
"step": 180
},
{
"epoch": 2.289156626506024,
"grad_norm": 15.979503631591797,
"learning_rate": 9.511340957081958e-05,
"loss": 0.40279397964477537,
"step": 190
},
{
"epoch": 2.4096385542168672,
"grad_norm": 14.119962692260742,
"learning_rate": 9.416710817922615e-05,
"loss": 0.41336545944213865,
"step": 200
},
{
"epoch": 2.5301204819277108,
"grad_norm": 17.406816482543945,
"learning_rate": 9.314269915162114e-05,
"loss": 0.3019423961639404,
"step": 210
},
{
"epoch": 2.6506024096385543,
"grad_norm": 14.716191291809082,
"learning_rate": 9.204199411142196e-05,
"loss": 0.3748778820037842,
"step": 220
},
{
"epoch": 2.7710843373493974,
"grad_norm": 21.437185287475586,
"learning_rate": 9.086693960827105e-05,
"loss": 0.34201803207397463,
"step": 230
},
{
"epoch": 2.891566265060241,
"grad_norm": 10.061134338378906,
"learning_rate": 8.961961367564651e-05,
"loss": 0.49113874435424804,
"step": 240
},
{
"epoch": 3.0,
"eval_accuracy": 0.9646017699115044,
"eval_loss": 0.1344006061553955,
"eval_runtime": 6.1864,
"eval_samples_per_second": 91.329,
"eval_steps_per_second": 1.455,
"step": 249
},
{
"epoch": 3.0120481927710845,
"grad_norm": 12.825634956359863,
"learning_rate": 8.83022221559489e-05,
"loss": 0.27299160957336427,
"step": 250
},
{
"epoch": 3.1325301204819276,
"grad_norm": 10.527647972106934,
"learning_rate": 8.691709479956373e-05,
"loss": 0.24414093494415284,
"step": 260
},
{
"epoch": 3.253012048192771,
"grad_norm": 4.087771892547607,
"learning_rate": 8.546668114479768e-05,
"loss": 0.18104053735733033,
"step": 270
},
{
"epoch": 3.3734939759036147,
"grad_norm": 9.269667625427246,
"learning_rate": 8.395354618597533e-05,
"loss": 0.3339837551116943,
"step": 280
},
{
"epoch": 3.4939759036144578,
"grad_norm": 5.284663200378418,
"learning_rate": 8.238036583735673e-05,
"loss": 0.2400984764099121,
"step": 290
},
{
"epoch": 3.6144578313253013,
"grad_norm": 8.479917526245117,
"learning_rate": 8.074992220089769e-05,
"loss": 0.22019331455230712,
"step": 300
},
{
"epoch": 3.734939759036145,
"grad_norm": 9.128419876098633,
"learning_rate": 7.906509864622203e-05,
"loss": 0.25870823860168457,
"step": 310
},
{
"epoch": 3.855421686746988,
"grad_norm": 12.799089431762695,
"learning_rate": 7.73288747115059e-05,
"loss": 0.20678648948669434,
"step": 320
},
{
"epoch": 3.9759036144578315,
"grad_norm": 11.251450538635254,
"learning_rate": 7.554432083429253e-05,
"loss": 0.1629856824874878,
"step": 330
},
{
"epoch": 4.0,
"eval_accuracy": 0.9575221238938053,
"eval_loss": 0.13474561274051666,
"eval_runtime": 5.6362,
"eval_samples_per_second": 100.245,
"eval_steps_per_second": 1.597,
"step": 332
},
{
"epoch": 4.096385542168675,
"grad_norm": 19.876901626586914,
"learning_rate": 7.3714592921555e-05,
"loss": 0.24734578132629395,
"step": 340
},
{
"epoch": 4.216867469879518,
"grad_norm": 13.15965461730957,
"learning_rate": 7.184292676861024e-05,
"loss": 0.22765071392059327,
"step": 350
},
{
"epoch": 4.337349397590361,
"grad_norm": 12.12977123260498,
"learning_rate": 6.99326323367538e-05,
"loss": 0.17916421890258788,
"step": 360
},
{
"epoch": 4.457831325301205,
"grad_norm": 8.979646682739258,
"learning_rate": 6.798708789973527e-05,
"loss": 0.1901506304740906,
"step": 370
},
{
"epoch": 4.578313253012048,
"grad_norm": 5.592668056488037,
"learning_rate": 6.600973406942616e-05,
"loss": 0.22261853218078614,
"step": 380
},
{
"epoch": 4.698795180722891,
"grad_norm": 12.222548484802246,
"learning_rate": 6.400406771124536e-05,
"loss": 0.16046804189682007,
"step": 390
},
{
"epoch": 4.8192771084337345,
"grad_norm": 9.516422271728516,
"learning_rate": 6.197363576010264e-05,
"loss": 0.3090466022491455,
"step": 400
},
{
"epoch": 4.9397590361445785,
"grad_norm": 8.311286926269531,
"learning_rate": 5.992202894779649e-05,
"loss": 0.18722275495529175,
"step": 410
},
{
"epoch": 5.0,
"eval_accuracy": 0.9628318584070796,
"eval_loss": 0.11059214919805527,
"eval_runtime": 5.668,
"eval_samples_per_second": 99.682,
"eval_steps_per_second": 1.588,
"step": 415
},
{
"epoch": 5.0602409638554215,
"grad_norm": 6.544140338897705,
"learning_rate": 5.7852875452958954e-05,
"loss": 0.1725080966949463,
"step": 420
},
{
"epoch": 5.180722891566265,
"grad_norm": 7.241940975189209,
"learning_rate": 5.576983448477734e-05,
"loss": 0.2657145023345947,
"step": 430
},
{
"epoch": 5.301204819277109,
"grad_norm": 2.805722713470459,
"learning_rate": 5.3676589811839796e-05,
"loss": 0.16265145540237427,
"step": 440
},
{
"epoch": 5.421686746987952,
"grad_norm": 7.153483867645264,
"learning_rate": 5.157684324754858e-05,
"loss": 0.1511433720588684,
"step": 450
},
{
"epoch": 5.542168674698795,
"grad_norm": 3.1414175033569336,
"learning_rate": 4.9474308103621874e-05,
"loss": 0.15450478792190553,
"step": 460
},
{
"epoch": 5.662650602409639,
"grad_norm": 3.8960776329040527,
"learning_rate": 4.737270262326134e-05,
"loss": 0.13111191987991333,
"step": 470
},
{
"epoch": 5.783132530120482,
"grad_norm": 7.418442726135254,
"learning_rate": 4.527574340559844e-05,
"loss": 0.1539200186729431,
"step": 480
},
{
"epoch": 5.903614457831325,
"grad_norm": 8.860248565673828,
"learning_rate": 4.3187138833048456e-05,
"loss": 0.1801429271697998,
"step": 490
},
{
"epoch": 6.0,
"eval_accuracy": 0.9823008849557522,
"eval_loss": 0.09679495543241501,
"eval_runtime": 6.1076,
"eval_samples_per_second": 92.508,
"eval_steps_per_second": 1.474,
"step": 498
},
{
"epoch": 6.024096385542169,
"grad_norm": 11.467449188232422,
"learning_rate": 4.111058251319516e-05,
"loss": 0.11156998872756958,
"step": 500
},
{
"epoch": 6.144578313253012,
"grad_norm": 13.2984037399292,
"learning_rate": 3.904974674680436e-05,
"loss": 0.11730811595916749,
"step": 510
},
{
"epoch": 6.265060240963855,
"grad_norm": 3.543509006500244,
"learning_rate": 3.7008276033517396e-05,
"loss": 0.19998840093612671,
"step": 520
},
{
"epoch": 6.385542168674699,
"grad_norm": 4.334460735321045,
"learning_rate": 3.49897806267101e-05,
"loss": 0.09577634930610657,
"step": 530
},
{
"epoch": 6.506024096385542,
"grad_norm": 1.5698552131652832,
"learning_rate": 3.2997830148914314e-05,
"loss": 0.11064940690994263,
"step": 540
},
{
"epoch": 6.626506024096385,
"grad_norm": 1.4867029190063477,
"learning_rate": 3.103594727909385e-05,
"loss": 0.0978583574295044,
"step": 550
},
{
"epoch": 6.746987951807229,
"grad_norm": 8.762438774108887,
"learning_rate": 2.910760152293764e-05,
"loss": 0.08853105902671814,
"step": 560
},
{
"epoch": 6.867469879518072,
"grad_norm": 5.0525360107421875,
"learning_rate": 2.721620307718793e-05,
"loss": 0.13467444181442262,
"step": 570
},
{
"epoch": 6.9879518072289155,
"grad_norm": 13.927603721618652,
"learning_rate": 2.536509679885355e-05,
"loss": 0.14531443119049073,
"step": 580
},
{
"epoch": 7.0,
"eval_accuracy": 0.9716814159292035,
"eval_loss": 0.1196078509092331,
"eval_runtime": 5.7351,
"eval_samples_per_second": 98.517,
"eval_steps_per_second": 1.569,
"step": 581
},
{
"epoch": 7.108433734939759,
"grad_norm": 1.3813672065734863,
"learning_rate": 2.3557556289973838e-05,
"loss": 0.07141577005386353,
"step": 590
},
{
"epoch": 7.228915662650603,
"grad_norm": 11.30803108215332,
"learning_rate": 2.179677810839382e-05,
"loss": 0.08913902044296265,
"step": 600
},
{
"epoch": 7.349397590361446,
"grad_norm": 15.15943717956543,
"learning_rate": 2.0085876114788937e-05,
"loss": 0.1786208987236023,
"step": 610
},
{
"epoch": 7.469879518072289,
"grad_norm": 6.659374237060547,
"learning_rate": 1.8427875965935758e-05,
"loss": 0.05375434160232544,
"step": 620
},
{
"epoch": 7.590361445783133,
"grad_norm": 3.859622001647949,
"learning_rate": 1.682570976396811e-05,
"loss": 0.13732693195343018,
"step": 630
},
{
"epoch": 7.710843373493976,
"grad_norm": 4.593474388122559,
"learning_rate": 1.5282210871079926e-05,
"loss": 0.09100980162620545,
"step": 640
},
{
"epoch": 7.831325301204819,
"grad_norm": 13.193694114685059,
"learning_rate": 1.3800108898846021e-05,
"loss": 0.09656141400337219,
"step": 650
},
{
"epoch": 7.951807228915663,
"grad_norm": 13.730281829833984,
"learning_rate": 1.2382024881020937e-05,
"loss": 0.0786526083946228,
"step": 660
},
{
"epoch": 8.0,
"eval_accuracy": 0.9893805309734514,
"eval_loss": 0.08379530161619186,
"eval_runtime": 5.6656,
"eval_samples_per_second": 99.724,
"eval_steps_per_second": 1.589,
"step": 664
},
{
"epoch": 8.072289156626505,
"grad_norm": 1.4331773519515991,
"learning_rate": 1.1030466638353293e-05,
"loss": 0.0922305703163147,
"step": 670
},
{
"epoch": 8.19277108433735,
"grad_norm": 11.172012329101562,
"learning_rate": 9.747824343612338e-06,
"loss": 0.051563167572021486,
"step": 680
},
{
"epoch": 8.313253012048193,
"grad_norm": 9.389365196228027,
"learning_rate": 8.536366294669978e-06,
"loss": 0.0976746916770935,
"step": 690
},
{
"epoch": 8.433734939759036,
"grad_norm": 0.6641272902488708,
"learning_rate": 7.398234903113266e-06,
"loss": 0.07286246418952942,
"step": 700
},
{
"epoch": 8.55421686746988,
"grad_norm": 10.819772720336914,
"learning_rate": 6.335442905481442e-06,
"loss": 0.07259726524353027,
"step": 710
},
{
"epoch": 8.674698795180722,
"grad_norm": 0.8964389562606812,
"learning_rate": 5.349869803827717e-06,
"loss": 0.043448707461357115,
"step": 720
},
{
"epoch": 8.795180722891565,
"grad_norm": 7.601110935211182,
"learning_rate": 4.4432585419005076e-06,
"loss": 0.10902594327926636,
"step": 730
},
{
"epoch": 8.91566265060241,
"grad_norm": 0.9344149827957153,
"learning_rate": 3.6172124228221914e-06,
"loss": 0.03534201383590698,
"step": 740
},
{
"epoch": 9.0,
"eval_accuracy": 0.9911504424778761,
"eval_loss": 0.0801326259970665,
"eval_runtime": 6.1806,
"eval_samples_per_second": 91.415,
"eval_steps_per_second": 1.456,
"step": 747
},
{
"epoch": 9.036144578313253,
"grad_norm": 2.648090124130249,
"learning_rate": 2.8731922737163685e-06,
"loss": 0.024153660237789153,
"step": 750
},
{
"epoch": 9.156626506024097,
"grad_norm": 7.166019439697266,
"learning_rate": 2.212513862297649e-06,
"loss": 0.12096415758132935,
"step": 760
},
{
"epoch": 9.27710843373494,
"grad_norm": 9.207470893859863,
"learning_rate": 1.6363455699930419e-06,
"loss": 0.10932642221450806,
"step": 770
},
{
"epoch": 9.397590361445783,
"grad_norm": 2.5481743812561035,
"learning_rate": 1.145706325709389e-06,
"loss": 0.06989773511886596,
"step": 780
},
{
"epoch": 9.518072289156626,
"grad_norm": 12.317899703979492,
"learning_rate": 7.414638039014265e-07,
"loss": 0.08738085627555847,
"step": 790
},
{
"epoch": 9.638554216867469,
"grad_norm": 2.812633752822876,
"learning_rate": 4.2433289012662194e-07,
"loss": 0.04424922168254852,
"step": 800
},
{
"epoch": 9.759036144578314,
"grad_norm": 1.378448486328125,
"learning_rate": 1.9487441680084983e-07,
"loss": 0.07828723788261413,
"step": 810
},
{
"epoch": 9.879518072289157,
"grad_norm": 9.539847373962402,
"learning_rate": 5.3494171390228166e-08,
"loss": 0.06453937888145447,
"step": 820
},
{
"epoch": 10.0,
"grad_norm": 0.7179245948791504,
"learning_rate": 4.4217879344166103e-10,
"loss": 0.08780375719070435,
"step": 830
},
{
"epoch": 10.0,
"eval_accuracy": 0.9911504424778761,
"eval_loss": 0.08178059756755829,
"eval_runtime": 5.6512,
"eval_samples_per_second": 99.979,
"eval_steps_per_second": 1.593,
"step": 830
},
{
"epoch": 10.0,
"step": 830,
"total_flos": 8.593274471605862e+17,
"train_loss": 0.29410572172288435,
"train_runtime": 896.4382,
"train_samples_per_second": 29.461,
"train_steps_per_second": 0.926
}
],
"logging_steps": 10,
"max_steps": 830,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.593274471605862e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}