GPT2-PET-impression / trainer_state.json
xtie's picture
Initial commit
44fba23
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 16.0,
"global_step": 16800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 5e-05,
"loss": 1.7753,
"step": 128
},
{
"epoch": 0.24,
"learning_rate": 5e-05,
"loss": 1.4026,
"step": 256
},
{
"epoch": 0.37,
"learning_rate": 5e-05,
"loss": 1.3331,
"step": 384
},
{
"epoch": 0.49,
"learning_rate": 5e-05,
"loss": 1.3015,
"step": 512
},
{
"epoch": 0.61,
"learning_rate": 5e-05,
"loss": 1.2594,
"step": 640
},
{
"epoch": 0.73,
"learning_rate": 5e-05,
"loss": 1.2452,
"step": 768
},
{
"epoch": 0.85,
"learning_rate": 5e-05,
"loss": 1.2219,
"step": 896
},
{
"epoch": 0.98,
"learning_rate": 5e-05,
"loss": 1.2057,
"step": 1024
},
{
"epoch": 1.1,
"learning_rate": 5e-05,
"loss": 1.086,
"step": 1152
},
{
"epoch": 1.22,
"learning_rate": 5e-05,
"loss": 1.046,
"step": 1280
},
{
"epoch": 1.34,
"learning_rate": 5e-05,
"loss": 1.0642,
"step": 1408
},
{
"epoch": 1.46,
"learning_rate": 5e-05,
"loss": 1.0602,
"step": 1536
},
{
"epoch": 1.58,
"learning_rate": 5e-05,
"loss": 1.0552,
"step": 1664
},
{
"epoch": 1.71,
"learning_rate": 5e-05,
"loss": 1.0565,
"step": 1792
},
{
"epoch": 1.83,
"learning_rate": 5e-05,
"loss": 1.0602,
"step": 1920
},
{
"epoch": 1.95,
"learning_rate": 5e-05,
"loss": 1.0534,
"step": 2048
},
{
"epoch": 2.07,
"learning_rate": 5e-05,
"loss": 0.9373,
"step": 2176
},
{
"epoch": 2.19,
"learning_rate": 5e-05,
"loss": 0.8638,
"step": 2304
},
{
"epoch": 2.32,
"learning_rate": 5e-05,
"loss": 0.8759,
"step": 2432
},
{
"epoch": 2.44,
"learning_rate": 5e-05,
"loss": 0.8775,
"step": 2560
},
{
"epoch": 2.56,
"learning_rate": 5e-05,
"loss": 0.8906,
"step": 2688
},
{
"epoch": 2.68,
"learning_rate": 5e-05,
"loss": 0.8947,
"step": 2816
},
{
"epoch": 2.8,
"learning_rate": 5e-05,
"loss": 0.8846,
"step": 2944
},
{
"epoch": 2.93,
"learning_rate": 5e-05,
"loss": 0.8891,
"step": 3072
},
{
"epoch": 3.05,
"learning_rate": 5e-05,
"loss": 0.8039,
"step": 3200
},
{
"epoch": 3.17,
"learning_rate": 5e-05,
"loss": 0.6736,
"step": 3328
},
{
"epoch": 3.29,
"learning_rate": 5e-05,
"loss": 0.6814,
"step": 3456
},
{
"epoch": 3.41,
"learning_rate": 5e-05,
"loss": 0.7016,
"step": 3584
},
{
"epoch": 3.54,
"learning_rate": 5e-05,
"loss": 0.7039,
"step": 3712
},
{
"epoch": 3.66,
"learning_rate": 5e-05,
"loss": 0.708,
"step": 3840
},
{
"epoch": 3.78,
"learning_rate": 5e-05,
"loss": 0.7175,
"step": 3968
},
{
"epoch": 3.9,
"learning_rate": 5e-05,
"loss": 0.7214,
"step": 4096
},
{
"epoch": 4.02,
"learning_rate": 5e-05,
"loss": 0.6764,
"step": 4224
},
{
"epoch": 4.14,
"learning_rate": 5e-05,
"loss": 0.4891,
"step": 4352
},
{
"epoch": 4.27,
"learning_rate": 5e-05,
"loss": 0.5006,
"step": 4480
},
{
"epoch": 4.39,
"learning_rate": 5e-05,
"loss": 0.5129,
"step": 4608
},
{
"epoch": 4.51,
"learning_rate": 5e-05,
"loss": 0.5221,
"step": 4736
},
{
"epoch": 4.63,
"learning_rate": 5e-05,
"loss": 0.5278,
"step": 4864
},
{
"epoch": 4.75,
"learning_rate": 5e-05,
"loss": 0.5413,
"step": 4992
},
{
"epoch": 4.88,
"learning_rate": 5e-05,
"loss": 0.5393,
"step": 5120
},
{
"epoch": 5.0,
"learning_rate": 5e-05,
"loss": 0.5398,
"step": 5248
},
{
"epoch": 5.12,
"learning_rate": 5e-05,
"loss": 0.3369,
"step": 5376
},
{
"epoch": 5.24,
"learning_rate": 5e-05,
"loss": 0.3417,
"step": 5504
},
{
"epoch": 5.36,
"learning_rate": 5e-05,
"loss": 0.3502,
"step": 5632
},
{
"epoch": 5.49,
"learning_rate": 5e-05,
"loss": 0.3593,
"step": 5760
},
{
"epoch": 5.61,
"learning_rate": 5e-05,
"loss": 0.3695,
"step": 5888
},
{
"epoch": 5.73,
"learning_rate": 5e-05,
"loss": 0.3764,
"step": 6016
},
{
"epoch": 5.85,
"learning_rate": 5e-05,
"loss": 0.3831,
"step": 6144
},
{
"epoch": 5.97,
"learning_rate": 5e-05,
"loss": 0.3891,
"step": 6272
},
{
"epoch": 6.1,
"learning_rate": 5e-05,
"loss": 0.2571,
"step": 6400
},
{
"epoch": 6.22,
"learning_rate": 5e-05,
"loss": 0.2232,
"step": 6528
},
{
"epoch": 6.34,
"learning_rate": 5e-05,
"loss": 0.2324,
"step": 6656
},
{
"epoch": 6.46,
"learning_rate": 5e-05,
"loss": 0.2421,
"step": 6784
},
{
"epoch": 6.58,
"learning_rate": 5e-05,
"loss": 0.2479,
"step": 6912
},
{
"epoch": 6.7,
"learning_rate": 5e-05,
"loss": 0.2584,
"step": 7040
},
{
"epoch": 6.83,
"learning_rate": 5e-05,
"loss": 0.2642,
"step": 7168
},
{
"epoch": 6.95,
"learning_rate": 5e-05,
"loss": 0.2706,
"step": 7296
},
{
"epoch": 7.07,
"learning_rate": 5e-05,
"loss": 0.203,
"step": 7424
},
{
"epoch": 7.19,
"learning_rate": 5e-05,
"loss": 0.1488,
"step": 7552
},
{
"epoch": 7.31,
"learning_rate": 5e-05,
"loss": 0.1654,
"step": 7680
},
{
"epoch": 7.44,
"learning_rate": 5e-05,
"loss": 0.1706,
"step": 7808
},
{
"epoch": 7.56,
"learning_rate": 5e-05,
"loss": 0.1799,
"step": 7936
},
{
"epoch": 7.68,
"learning_rate": 5e-05,
"loss": 0.1823,
"step": 8064
},
{
"epoch": 7.8,
"learning_rate": 5e-05,
"loss": 0.1867,
"step": 8192
},
{
"epoch": 7.92,
"learning_rate": 5e-05,
"loss": 0.1931,
"step": 8320
},
{
"epoch": 8.05,
"learning_rate": 5e-05,
"loss": 0.1635,
"step": 8448
},
{
"epoch": 8.17,
"learning_rate": 5e-05,
"loss": 0.1111,
"step": 8576
},
{
"epoch": 8.29,
"learning_rate": 5e-05,
"loss": 0.1158,
"step": 8704
},
{
"epoch": 8.41,
"learning_rate": 5e-05,
"loss": 0.1188,
"step": 8832
},
{
"epoch": 8.53,
"learning_rate": 5e-05,
"loss": 0.124,
"step": 8960
},
{
"epoch": 8.66,
"learning_rate": 5e-05,
"loss": 0.1295,
"step": 9088
},
{
"epoch": 8.78,
"learning_rate": 5e-05,
"loss": 0.1348,
"step": 9216
},
{
"epoch": 8.9,
"learning_rate": 5e-05,
"loss": 0.1386,
"step": 9344
},
{
"epoch": 9.02,
"learning_rate": 5e-05,
"loss": 0.1343,
"step": 9472
},
{
"epoch": 9.14,
"learning_rate": 5e-05,
"loss": 0.0847,
"step": 9600
},
{
"epoch": 9.26,
"learning_rate": 5e-05,
"loss": 0.0873,
"step": 9728
},
{
"epoch": 9.39,
"learning_rate": 5e-05,
"loss": 0.0915,
"step": 9856
},
{
"epoch": 9.51,
"learning_rate": 5e-05,
"loss": 0.0961,
"step": 9984
},
{
"epoch": 9.63,
"learning_rate": 5e-05,
"loss": 0.1003,
"step": 10112
},
{
"epoch": 9.75,
"learning_rate": 5e-05,
"loss": 0.1043,
"step": 10240
},
{
"epoch": 9.87,
"learning_rate": 5e-05,
"loss": 0.1094,
"step": 10368
},
{
"epoch": 10.0,
"learning_rate": 5e-05,
"loss": 0.1135,
"step": 10496
},
{
"epoch": 10.12,
"learning_rate": 5e-05,
"loss": 0.0723,
"step": 10624
},
{
"epoch": 10.24,
"learning_rate": 5e-05,
"loss": 0.0724,
"step": 10752
},
{
"epoch": 10.36,
"learning_rate": 5e-05,
"loss": 0.0759,
"step": 10880
},
{
"epoch": 10.48,
"learning_rate": 5e-05,
"loss": 0.0779,
"step": 11008
},
{
"epoch": 10.61,
"learning_rate": 5e-05,
"loss": 0.0817,
"step": 11136
},
{
"epoch": 10.73,
"learning_rate": 5e-05,
"loss": 0.0867,
"step": 11264
},
{
"epoch": 10.85,
"learning_rate": 5e-05,
"loss": 0.0908,
"step": 11392
},
{
"epoch": 10.97,
"learning_rate": 5e-05,
"loss": 0.0954,
"step": 11520
},
{
"epoch": 11.09,
"learning_rate": 5e-05,
"loss": 0.0691,
"step": 11648
},
{
"epoch": 11.22,
"learning_rate": 5e-05,
"loss": 0.0627,
"step": 11776
},
{
"epoch": 11.34,
"learning_rate": 5e-05,
"loss": 0.0649,
"step": 11904
},
{
"epoch": 11.46,
"learning_rate": 5e-05,
"loss": 0.0683,
"step": 12032
},
{
"epoch": 11.58,
"learning_rate": 5e-05,
"loss": 0.0721,
"step": 12160
},
{
"epoch": 11.7,
"learning_rate": 5e-05,
"loss": 0.0747,
"step": 12288
},
{
"epoch": 11.82,
"learning_rate": 5e-05,
"loss": 0.0768,
"step": 12416
},
{
"epoch": 11.95,
"learning_rate": 5e-05,
"loss": 0.0809,
"step": 12544
},
{
"epoch": 12.07,
"learning_rate": 5e-05,
"loss": 0.0681,
"step": 12672
},
{
"epoch": 12.19,
"learning_rate": 5e-05,
"loss": 0.0546,
"step": 12800
},
{
"epoch": 12.31,
"learning_rate": 5e-05,
"loss": 0.0561,
"step": 12928
},
{
"epoch": 12.43,
"learning_rate": 5e-05,
"loss": 0.059,
"step": 13056
},
{
"epoch": 12.56,
"learning_rate": 5e-05,
"loss": 0.0612,
"step": 13184
},
{
"epoch": 12.68,
"learning_rate": 5e-05,
"loss": 0.0644,
"step": 13312
},
{
"epoch": 12.8,
"learning_rate": 5e-05,
"loss": 0.0693,
"step": 13440
},
{
"epoch": 12.92,
"learning_rate": 5e-05,
"loss": 0.072,
"step": 13568
},
{
"epoch": 13.04,
"learning_rate": 5e-05,
"loss": 0.0663,
"step": 13696
},
{
"epoch": 13.17,
"learning_rate": 5e-05,
"loss": 0.0506,
"step": 13824
},
{
"epoch": 13.29,
"learning_rate": 5e-05,
"loss": 0.0512,
"step": 13952
},
{
"epoch": 13.41,
"learning_rate": 5e-05,
"loss": 0.0539,
"step": 14080
},
{
"epoch": 13.53,
"learning_rate": 5e-05,
"loss": 0.057,
"step": 14208
},
{
"epoch": 13.65,
"learning_rate": 5e-05,
"loss": 0.059,
"step": 14336
},
{
"epoch": 13.78,
"learning_rate": 5e-05,
"loss": 0.0614,
"step": 14464
},
{
"epoch": 13.9,
"learning_rate": 5e-05,
"loss": 0.0643,
"step": 14592
},
{
"epoch": 14.02,
"learning_rate": 5e-05,
"loss": 0.0645,
"step": 14720
},
{
"epoch": 14.14,
"learning_rate": 5e-05,
"loss": 0.0454,
"step": 14848
},
{
"epoch": 14.26,
"learning_rate": 5e-05,
"loss": 0.0465,
"step": 14976
},
{
"epoch": 14.38,
"learning_rate": 5e-05,
"loss": 0.0484,
"step": 15104
},
{
"epoch": 14.51,
"learning_rate": 5e-05,
"loss": 0.0508,
"step": 15232
},
{
"epoch": 14.63,
"learning_rate": 5e-05,
"loss": 0.053,
"step": 15360
},
{
"epoch": 14.75,
"learning_rate": 5e-05,
"loss": 0.0546,
"step": 15488
},
{
"epoch": 14.87,
"learning_rate": 5e-05,
"loss": 0.0566,
"step": 15616
},
{
"epoch": 14.99,
"learning_rate": 5e-05,
"loss": 0.0594,
"step": 15744
},
{
"epoch": 15.12,
"learning_rate": 5e-05,
"loss": 0.043,
"step": 15872
},
{
"epoch": 15.24,
"learning_rate": 5e-05,
"loss": 0.0433,
"step": 16000
},
{
"epoch": 15.36,
"learning_rate": 5e-05,
"loss": 0.0456,
"step": 16128
},
{
"epoch": 15.48,
"learning_rate": 5e-05,
"loss": 0.0474,
"step": 16256
},
{
"epoch": 15.6,
"learning_rate": 5e-05,
"loss": 0.049,
"step": 16384
},
{
"epoch": 15.73,
"learning_rate": 5e-05,
"loss": 0.0501,
"step": 16512
},
{
"epoch": 15.85,
"learning_rate": 5e-05,
"loss": 0.0522,
"step": 16640
},
{
"epoch": 15.97,
"learning_rate": 5e-05,
"loss": 0.0554,
"step": 16768
}
],
"max_steps": 21000,
"num_train_epochs": 20,
"total_flos": 3285446215892992.0,
"trial_name": null,
"trial_params": null
}