Media1129 commited on
Commit
4a8d043
·
1 Parent(s): 1e284f1
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +39 -207
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:579babb9ce23573912a301bc1751de67cf9825ec3ea9317a2e01ffe73fe79455
3
  size 871313389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe83c26ae76d5d93e1957215fb0179310660f1ebaa579ad6c4fc38211140e10
3
  size 871313389
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:066ce09e712752766615c1b53e42e41f5d069d3b600761e41b63ce9cebbdf3f0
3
  size 435682807
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18cb2c391a81bf1fefe1e3674b5d61d4cccf3981c080a91e1f96fd993adbe89
3
  size 435682807
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22ef52bddeff60306a441bc1a3df088a0a8624db9d38fee3f8b4d84b0ab49949
3
  size 14657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fb839d52116fc81521e1cce677654e57def16b6ad2e9658db7953a663daf0fa
3
  size 14657
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042af15c79784565ba53d294ccda64499ce1432cf6644e625e4cc137910bd1d8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f86f1b35e573bdf2990c3537ab12b9cbd7afe67dc47eb0291e8daad0a519dc09
3
  size 623
trainer_state.json CHANGED
@@ -1,256 +1,88 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.22543352601156,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.18,
12
- "learning_rate": 4.909682080924856e-05,
13
- "loss": 0.0971,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.36,
18
- "learning_rate": 4.8193641618497106e-05,
19
- "loss": 0.0082,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 0.54,
24
- "learning_rate": 4.729046242774567e-05,
25
- "loss": 0.0066,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 0.72,
30
- "learning_rate": 4.6387283236994224e-05,
31
- "loss": 0.0064,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 0.9,
36
- "learning_rate": 4.548410404624278e-05,
37
- "loss": 0.005,
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 1.08,
42
- "learning_rate": 4.458092485549133e-05,
43
- "loss": 0.0034,
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 1.26,
48
- "learning_rate": 4.367774566473989e-05,
49
- "loss": 0.0026,
50
  "step": 3500
51
  },
52
  {
53
- "epoch": 1.45,
54
- "learning_rate": 4.2774566473988445e-05,
55
- "loss": 0.0066,
56
  "step": 4000
57
  },
58
  {
59
- "epoch": 1.63,
60
- "learning_rate": 4.1871387283236994e-05,
61
- "loss": 0.0029,
62
  "step": 4500
63
  },
64
  {
65
- "epoch": 1.81,
66
- "learning_rate": 4.096820809248555e-05,
67
- "loss": 0.0021,
68
  "step": 5000
69
  },
70
  {
71
- "epoch": 1.99,
72
- "learning_rate": 4.006502890173411e-05,
73
- "loss": 0.0029,
74
  "step": 5500
75
  },
76
  {
77
- "epoch": 2.17,
78
- "learning_rate": 3.916184971098266e-05,
79
- "loss": 0.003,
80
  "step": 6000
81
- },
82
- {
83
- "epoch": 2.35,
84
- "learning_rate": 3.8258670520231215e-05,
85
- "loss": 0.0031,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 2.53,
90
- "learning_rate": 3.735549132947977e-05,
91
- "loss": 0.0016,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 2.71,
96
- "learning_rate": 3.6452312138728326e-05,
97
- "loss": 0.0016,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 2.89,
102
- "learning_rate": 3.554913294797688e-05,
103
- "loss": 0.0031,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 3.07,
108
- "learning_rate": 3.464595375722544e-05,
109
- "loss": 0.0035,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 3.25,
114
- "learning_rate": 3.374277456647399e-05,
115
- "loss": 0.0009,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 3.43,
120
- "learning_rate": 3.283959537572254e-05,
121
- "loss": 0.0025,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 3.61,
126
- "learning_rate": 3.19364161849711e-05,
127
- "loss": 0.0033,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 3.79,
132
- "learning_rate": 3.103323699421966e-05,
133
- "loss": 0.0009,
134
- "step": 10500
135
- },
136
- {
137
- "epoch": 3.97,
138
- "learning_rate": 3.013005780346821e-05,
139
- "loss": 0.0012,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 4.15,
144
- "learning_rate": 2.9226878612716762e-05,
145
- "loss": 0.0007,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 4.34,
150
- "learning_rate": 2.832369942196532e-05,
151
- "loss": 0.0019,
152
- "step": 12000
153
- },
154
- {
155
- "epoch": 4.52,
156
- "learning_rate": 2.7420520231213876e-05,
157
- "loss": 0.0009,
158
- "step": 12500
159
- },
160
- {
161
- "epoch": 4.7,
162
- "learning_rate": 2.651734104046243e-05,
163
- "loss": 0.0006,
164
- "step": 13000
165
- },
166
- {
167
- "epoch": 4.88,
168
- "learning_rate": 2.5614161849710984e-05,
169
- "loss": 0.0005,
170
- "step": 13500
171
- },
172
- {
173
- "epoch": 5.06,
174
- "learning_rate": 2.471098265895954e-05,
175
- "loss": 0.0021,
176
- "step": 14000
177
- },
178
- {
179
- "epoch": 5.24,
180
- "learning_rate": 2.380780346820809e-05,
181
- "loss": 0.0023,
182
- "step": 14500
183
- },
184
- {
185
- "epoch": 5.42,
186
- "learning_rate": 2.290462427745665e-05,
187
- "loss": 0.0004,
188
- "step": 15000
189
- },
190
- {
191
- "epoch": 5.6,
192
- "learning_rate": 2.2001445086705202e-05,
193
- "loss": 0.0008,
194
- "step": 15500
195
- },
196
- {
197
- "epoch": 5.78,
198
- "learning_rate": 2.1098265895953757e-05,
199
- "loss": 0.0001,
200
- "step": 16000
201
- },
202
- {
203
- "epoch": 5.96,
204
- "learning_rate": 2.0195086705202312e-05,
205
- "loss": 0.0011,
206
- "step": 16500
207
- },
208
- {
209
- "epoch": 6.14,
210
- "learning_rate": 1.9291907514450868e-05,
211
- "loss": 0.0015,
212
- "step": 17000
213
- },
214
- {
215
- "epoch": 6.32,
216
- "learning_rate": 1.8388728323699423e-05,
217
- "loss": 0.0004,
218
- "step": 17500
219
- },
220
- {
221
- "epoch": 6.5,
222
- "learning_rate": 1.748554913294798e-05,
223
- "loss": 0.0003,
224
- "step": 18000
225
- },
226
- {
227
- "epoch": 6.68,
228
- "learning_rate": 1.6582369942196534e-05,
229
- "loss": 0.0,
230
- "step": 18500
231
- },
232
- {
233
- "epoch": 6.86,
234
- "learning_rate": 1.567919075144509e-05,
235
- "loss": 0.0002,
236
- "step": 19000
237
- },
238
- {
239
- "epoch": 7.04,
240
- "learning_rate": 1.4776011560693643e-05,
241
- "loss": 0.0005,
242
- "step": 19500
243
- },
244
- {
245
- "epoch": 7.23,
246
- "learning_rate": 1.3872832369942197e-05,
247
- "loss": 0.0001,
248
- "step": 20000
249
  }
250
  ],
251
- "max_steps": 27680,
252
- "num_train_epochs": 10,
253
- "total_flos": 3107617302960360.0,
254
  "trial_name": null,
255
  "trial_params": null
256
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.1881838074398248,
5
+ "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.18,
12
+ "learning_rate": 4.817651349380015e-05,
13
+ "loss": 0.1396,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.36,
18
+ "learning_rate": 4.6353026987600294e-05,
19
+ "loss": 0.0182,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 0.55,
24
+ "learning_rate": 4.452954048140044e-05,
25
+ "loss": 0.0148,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 0.73,
30
+ "learning_rate": 4.2706053975200585e-05,
31
+ "loss": 0.009,
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 0.91,
36
+ "learning_rate": 4.088256746900073e-05,
37
+ "loss": 0.0083,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 1.09,
42
+ "learning_rate": 3.9059080962800876e-05,
43
+ "loss": 0.0072,
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 1.28,
48
+ "learning_rate": 3.723559445660103e-05,
49
+ "loss": 0.004,
50
  "step": 3500
51
  },
52
  {
53
+ "epoch": 1.46,
54
+ "learning_rate": 3.541210795040117e-05,
55
+ "loss": 0.0052,
56
  "step": 4000
57
  },
58
  {
59
+ "epoch": 1.64,
60
+ "learning_rate": 3.358862144420131e-05,
61
+ "loss": 0.0032,
62
  "step": 4500
63
  },
64
  {
65
+ "epoch": 1.82,
66
+ "learning_rate": 3.1765134938001465e-05,
67
+ "loss": 0.0029,
68
  "step": 5000
69
  },
70
  {
71
+ "epoch": 2.01,
72
+ "learning_rate": 2.9941648431801604e-05,
73
+ "loss": 0.0055,
74
  "step": 5500
75
  },
76
  {
77
+ "epoch": 2.19,
78
+ "learning_rate": 2.811816192560175e-05,
79
+ "loss": 0.0027,
80
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
82
  ],
83
+ "max_steps": 13710,
84
+ "num_train_epochs": 5,
85
+ "total_flos": 913869748994976.0,
86
  "trial_name": null,
87
  "trial_params": null
88
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1bb2bacb28aff766b2989139df3648cad65ed44046a0a033488723ceb5108ad
3
  size 2607
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b91b3084725beeceaf4b2ad6b0a9decb7422bdd9a1ba4e3eecc94316284beb94
3
  size 2607