mohammadmahdinouri commited on
Commit
118ead0
·
verified ·
1 Parent(s): efc19bc

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f8dddf6de75a2669e45bc92f6a4ca08a65509177a3732a367cbfa1c80daacbe
3
  size 244223098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a76ac9077fe4281340d991c88d870e9612c562f18291ef2fff16717ae5fa8e
3
  size 244223098
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30dee1c1faa1fa9cca0bbdc3497512922f4906f4ab49d60e46fb24c934bb150d
3
  size 381944306
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d1f43ad07d5d2506105a4a30a104f55de4d303f90118c0693725602ca12996
3
  size 381944306
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfe43fa1be8fc23eebf6d0265c9e86d27dbe1a7183ee9ff8d290496f67f7920b
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad09e3903383f043ca34c37e900cc005c86f2fe664b64f1c40d71b64081bc5f
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02838e3dd99a981aed96c1e46abb129b6636bb9bdc4bb3b9d32692ead8821881
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394ea9776e43b9e34e0f31ed80a4339ee1363ca5d97e2a122bc469ea88e3a051
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aed6a0b83373d2ca2f6ea1f1ac78752c4b8eb48d2f34a0bffe9748140ee5f947
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185d9bf5dd99d2fb4439e3538848f269528414d649a4a521e708b152f704c300
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0de41f811c47a09044e5ad93b32d48fbc2e808eb9859cb07a66f7923677574e
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8af41d2465acba77e73ddefdb8cd3bcfad2e25c1dc9b4e154733802fd9eb85a
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8fd8d6850b7427eafc7ded0e60d1d7d6419f9660dea8de7c7cbb8cd0dbd9818
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91c39eca7bff6b168c2d221fc52c0c253070df5bf1f21ee503aeddc471ee587
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.016294461660613026,
6
  "eval_steps": 500,
7
- "global_step": 11000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3858,6 +3858,356 @@
3858
  "learning_rate": 0.0004974073060677781,
3859
  "loss": 23.6657,
3860
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3861
  }
3862
  ],
3863
  "logging_steps": 20,
@@ -3877,7 +4227,7 @@
3877
  "attributes": {}
3878
  }
3879
  },
3880
- "total_flos": 7.567919072411648e+18,
3881
  "train_batch_size": 48,
3882
  "trial_name": null,
3883
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.017775776357032393,
6
  "eval_steps": 500,
7
+ "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3858
  "learning_rate": 0.0004974073060677781,
3859
  "loss": 23.6657,
3860
  "step": 11000
3861
+ },
3862
+ {
3863
+ "epoch": 0.016324087954541416,
3864
+ "grad_norm": 18.875,
3865
+ "learning_rate": 0.0004974023671327705,
3866
+ "loss": 23.6059,
3867
+ "step": 11020
3868
+ },
3869
+ {
3870
+ "epoch": 0.016353714248469802,
3871
+ "grad_norm": 16.875,
3872
+ "learning_rate": 0.0004973974281977628,
3873
+ "loss": 23.6203,
3874
+ "step": 11040
3875
+ },
3876
+ {
3877
+ "epoch": 0.01638334054239819,
3878
+ "grad_norm": 26.0,
3879
+ "learning_rate": 0.0004973924892627553,
3880
+ "loss": 23.5207,
3881
+ "step": 11060
3882
+ },
3883
+ {
3884
+ "epoch": 0.016412966836326575,
3885
+ "grad_norm": 18.25,
3886
+ "learning_rate": 0.0004973875503277477,
3887
+ "loss": 23.711,
3888
+ "step": 11080
3889
+ },
3890
+ {
3891
+ "epoch": 0.016442593130254965,
3892
+ "grad_norm": 17.125,
3893
+ "learning_rate": 0.0004973826113927401,
3894
+ "loss": 23.5764,
3895
+ "step": 11100
3896
+ },
3897
+ {
3898
+ "epoch": 0.01647221942418335,
3899
+ "grad_norm": 18.125,
3900
+ "learning_rate": 0.0004973776724577325,
3901
+ "loss": 23.6693,
3902
+ "step": 11120
3903
+ },
3904
+ {
3905
+ "epoch": 0.016501845718111738,
3906
+ "grad_norm": 20.875,
3907
+ "learning_rate": 0.000497372733522725,
3908
+ "loss": 23.5375,
3909
+ "step": 11140
3910
+ },
3911
+ {
3912
+ "epoch": 0.016531472012040124,
3913
+ "grad_norm": 14.75,
3914
+ "learning_rate": 0.0004973677945877174,
3915
+ "loss": 23.5473,
3916
+ "step": 11160
3917
+ },
3918
+ {
3919
+ "epoch": 0.016561098305968514,
3920
+ "grad_norm": 15.625,
3921
+ "learning_rate": 0.0004973628556527099,
3922
+ "loss": 23.5889,
3923
+ "step": 11180
3924
+ },
3925
+ {
3926
+ "epoch": 0.0165907245998969,
3927
+ "grad_norm": 16.875,
3928
+ "learning_rate": 0.0004973579167177023,
3929
+ "loss": 23.5879,
3930
+ "step": 11200
3931
+ },
3932
+ {
3933
+ "epoch": 0.016620350893825287,
3934
+ "grad_norm": 18.375,
3935
+ "learning_rate": 0.0004973529777826948,
3936
+ "loss": 23.4974,
3937
+ "step": 11220
3938
+ },
3939
+ {
3940
+ "epoch": 0.016649977187753674,
3941
+ "grad_norm": 15.625,
3942
+ "learning_rate": 0.0004973480388476872,
3943
+ "loss": 23.4771,
3944
+ "step": 11240
3945
+ },
3946
+ {
3947
+ "epoch": 0.016679603481682063,
3948
+ "grad_norm": 17.5,
3949
+ "learning_rate": 0.0004973430999126796,
3950
+ "loss": 23.4806,
3951
+ "step": 11260
3952
+ },
3953
+ {
3954
+ "epoch": 0.01670922977561045,
3955
+ "grad_norm": 19.75,
3956
+ "learning_rate": 0.0004973381609776721,
3957
+ "loss": 23.651,
3958
+ "step": 11280
3959
+ },
3960
+ {
3961
+ "epoch": 0.016738856069538836,
3962
+ "grad_norm": 16.625,
3963
+ "learning_rate": 0.0004973332220426645,
3964
+ "loss": 23.5367,
3965
+ "step": 11300
3966
+ },
3967
+ {
3968
+ "epoch": 0.016768482363467223,
3969
+ "grad_norm": 19.875,
3970
+ "learning_rate": 0.0004973282831076569,
3971
+ "loss": 23.5171,
3972
+ "step": 11320
3973
+ },
3974
+ {
3975
+ "epoch": 0.016798108657395613,
3976
+ "grad_norm": 17.125,
3977
+ "learning_rate": 0.0004973233441726494,
3978
+ "loss": 23.4766,
3979
+ "step": 11340
3980
+ },
3981
+ {
3982
+ "epoch": 0.016827734951324,
3983
+ "grad_norm": 15.3125,
3984
+ "learning_rate": 0.0004973184052376418,
3985
+ "loss": 23.4622,
3986
+ "step": 11360
3987
+ },
3988
+ {
3989
+ "epoch": 0.016857361245252386,
3990
+ "grad_norm": 19.375,
3991
+ "learning_rate": 0.0004973134663026343,
3992
+ "loss": 23.5135,
3993
+ "step": 11380
3994
+ },
3995
+ {
3996
+ "epoch": 0.016886987539180772,
3997
+ "grad_norm": 17.625,
3998
+ "learning_rate": 0.0004973085273676267,
3999
+ "loss": 23.485,
4000
+ "step": 11400
4001
+ },
4002
+ {
4003
+ "epoch": 0.016916613833109162,
4004
+ "grad_norm": 17.5,
4005
+ "learning_rate": 0.0004973035884326191,
4006
+ "loss": 23.4218,
4007
+ "step": 11420
4008
+ },
4009
+ {
4010
+ "epoch": 0.01694624012703755,
4011
+ "grad_norm": 16.375,
4012
+ "learning_rate": 0.0004972986494976116,
4013
+ "loss": 23.4405,
4014
+ "step": 11440
4015
+ },
4016
+ {
4017
+ "epoch": 0.016975866420965935,
4018
+ "grad_norm": 21.125,
4019
+ "learning_rate": 0.000497293710562604,
4020
+ "loss": 23.4308,
4021
+ "step": 11460
4022
+ },
4023
+ {
4024
+ "epoch": 0.01700549271489432,
4025
+ "grad_norm": 18.875,
4026
+ "learning_rate": 0.0004972887716275964,
4027
+ "loss": 23.4076,
4028
+ "step": 11480
4029
+ },
4030
+ {
4031
+ "epoch": 0.01703511900882271,
4032
+ "grad_norm": 16.25,
4033
+ "learning_rate": 0.0004972838326925889,
4034
+ "loss": 23.4027,
4035
+ "step": 11500
4036
+ },
4037
+ {
4038
+ "epoch": 0.017064745302751098,
4039
+ "grad_norm": 15.9375,
4040
+ "learning_rate": 0.0004972788937575813,
4041
+ "loss": 23.3797,
4042
+ "step": 11520
4043
+ },
4044
+ {
4045
+ "epoch": 0.017094371596679484,
4046
+ "grad_norm": 17.875,
4047
+ "learning_rate": 0.0004972739548225737,
4048
+ "loss": 23.403,
4049
+ "step": 11540
4050
+ },
4051
+ {
4052
+ "epoch": 0.01712399789060787,
4053
+ "grad_norm": 20.25,
4054
+ "learning_rate": 0.0004972690158875662,
4055
+ "loss": 23.435,
4056
+ "step": 11560
4057
+ },
4058
+ {
4059
+ "epoch": 0.01715362418453626,
4060
+ "grad_norm": 17.875,
4061
+ "learning_rate": 0.0004972640769525586,
4062
+ "loss": 23.3429,
4063
+ "step": 11580
4064
+ },
4065
+ {
4066
+ "epoch": 0.017183250478464647,
4067
+ "grad_norm": 19.0,
4068
+ "learning_rate": 0.0004972591380175511,
4069
+ "loss": 23.3458,
4070
+ "step": 11600
4071
+ },
4072
+ {
4073
+ "epoch": 0.017212876772393033,
4074
+ "grad_norm": 14.3125,
4075
+ "learning_rate": 0.0004972541990825435,
4076
+ "loss": 23.3765,
4077
+ "step": 11620
4078
+ },
4079
+ {
4080
+ "epoch": 0.01724250306632142,
4081
+ "grad_norm": 17.375,
4082
+ "learning_rate": 0.0004972492601475358,
4083
+ "loss": 23.3812,
4084
+ "step": 11640
4085
+ },
4086
+ {
4087
+ "epoch": 0.01727212936024981,
4088
+ "grad_norm": 17.375,
4089
+ "learning_rate": 0.0004972443212125283,
4090
+ "loss": 23.3419,
4091
+ "step": 11660
4092
+ },
4093
+ {
4094
+ "epoch": 0.017301755654178196,
4095
+ "grad_norm": 15.9375,
4096
+ "learning_rate": 0.0004972393822775207,
4097
+ "loss": 23.1804,
4098
+ "step": 11680
4099
+ },
4100
+ {
4101
+ "epoch": 0.017331381948106583,
4102
+ "grad_norm": 15.125,
4103
+ "learning_rate": 0.0004972344433425131,
4104
+ "loss": 23.2947,
4105
+ "step": 11700
4106
+ },
4107
+ {
4108
+ "epoch": 0.01736100824203497,
4109
+ "grad_norm": 17.0,
4110
+ "learning_rate": 0.0004972295044075056,
4111
+ "loss": 23.402,
4112
+ "step": 11720
4113
+ },
4114
+ {
4115
+ "epoch": 0.01739063453596336,
4116
+ "grad_norm": 18.5,
4117
+ "learning_rate": 0.000497224565472498,
4118
+ "loss": 23.2933,
4119
+ "step": 11740
4120
+ },
4121
+ {
4122
+ "epoch": 0.017420260829891746,
4123
+ "grad_norm": 17.125,
4124
+ "learning_rate": 0.0004972196265374904,
4125
+ "loss": 23.296,
4126
+ "step": 11760
4127
+ },
4128
+ {
4129
+ "epoch": 0.017449887123820132,
4130
+ "grad_norm": 15.5625,
4131
+ "learning_rate": 0.0004972146876024829,
4132
+ "loss": 23.2247,
4133
+ "step": 11780
4134
+ },
4135
+ {
4136
+ "epoch": 0.017479513417748522,
4137
+ "grad_norm": 15.375,
4138
+ "learning_rate": 0.0004972097486674753,
4139
+ "loss": 23.1945,
4140
+ "step": 11800
4141
+ },
4142
+ {
4143
+ "epoch": 0.01750913971167691,
4144
+ "grad_norm": 17.25,
4145
+ "learning_rate": 0.0004972048097324678,
4146
+ "loss": 23.2879,
4147
+ "step": 11820
4148
+ },
4149
+ {
4150
+ "epoch": 0.017538766005605295,
4151
+ "grad_norm": 16.5,
4152
+ "learning_rate": 0.0004971998707974602,
4153
+ "loss": 23.2503,
4154
+ "step": 11840
4155
+ },
4156
+ {
4157
+ "epoch": 0.01756839229953368,
4158
+ "grad_norm": 16.875,
4159
+ "learning_rate": 0.0004971949318624526,
4160
+ "loss": 23.2298,
4161
+ "step": 11860
4162
+ },
4163
+ {
4164
+ "epoch": 0.01759801859346207,
4165
+ "grad_norm": 16.5,
4166
+ "learning_rate": 0.0004971899929274451,
4167
+ "loss": 23.2478,
4168
+ "step": 11880
4169
+ },
4170
+ {
4171
+ "epoch": 0.017627644887390458,
4172
+ "grad_norm": 16.875,
4173
+ "learning_rate": 0.0004971850539924375,
4174
+ "loss": 23.2439,
4175
+ "step": 11900
4176
+ },
4177
+ {
4178
+ "epoch": 0.017657271181318844,
4179
+ "grad_norm": 16.75,
4180
+ "learning_rate": 0.0004971801150574299,
4181
+ "loss": 23.2426,
4182
+ "step": 11920
4183
+ },
4184
+ {
4185
+ "epoch": 0.01768689747524723,
4186
+ "grad_norm": 15.5,
4187
+ "learning_rate": 0.0004971751761224224,
4188
+ "loss": 23.2587,
4189
+ "step": 11940
4190
+ },
4191
+ {
4192
+ "epoch": 0.01771652376917562,
4193
+ "grad_norm": 15.6875,
4194
+ "learning_rate": 0.0004971702371874148,
4195
+ "loss": 23.2458,
4196
+ "step": 11960
4197
+ },
4198
+ {
4199
+ "epoch": 0.017746150063104007,
4200
+ "grad_norm": 17.875,
4201
+ "learning_rate": 0.0004971652982524073,
4202
+ "loss": 23.1944,
4203
+ "step": 11980
4204
+ },
4205
+ {
4206
+ "epoch": 0.017775776357032393,
4207
+ "grad_norm": 15.5,
4208
+ "learning_rate": 0.0004971603593173997,
4209
+ "loss": 23.2169,
4210
+ "step": 12000
4211
  }
4212
  ],
4213
  "logging_steps": 20,
 
4227
  "attributes": {}
4228
  }
4229
  },
4230
+ "total_flos": 8.255909218322743e+18,
4231
  "train_batch_size": 48,
4232
  "trial_name": null,
4233
  "trial_params": null