fpadovani commited on
Commit
acd5c7b
·
verified ·
1 Parent(s): af5c7a8

Training in progress, step 9000, checkpoint

Browse files
checkpoint-9000/trainer_state.json CHANGED
@@ -26,9 +26,9 @@
26
  {
27
  "epoch": 1.0,
28
  "eval_loss": 3.5286149978637695,
29
- "eval_runtime": 19.6679,
30
- "eval_samples_per_second": 1618.73,
31
- "eval_steps_per_second": 6.356,
32
  "step": 496
33
  },
34
  {
@@ -41,9 +41,9 @@
41
  {
42
  "epoch": 2.0,
43
  "eval_loss": 3.3015408515930176,
44
- "eval_runtime": 19.7709,
45
- "eval_samples_per_second": 1610.299,
46
- "eval_steps_per_second": 6.322,
47
  "step": 992
48
  },
49
  {
@@ -56,9 +56,9 @@
56
  {
57
  "epoch": 3.0,
58
  "eval_loss": 3.2177340984344482,
59
- "eval_runtime": 19.6424,
60
- "eval_samples_per_second": 1620.83,
61
- "eval_steps_per_second": 6.364,
62
  "step": 1488
63
  },
64
  {
@@ -71,9 +71,9 @@
71
  {
72
  "epoch": 4.0,
73
  "eval_loss": 3.172600269317627,
74
- "eval_runtime": 19.9514,
75
- "eval_samples_per_second": 1595.729,
76
- "eval_steps_per_second": 6.265,
77
  "step": 1984
78
  },
79
  {
@@ -86,9 +86,9 @@
86
  {
87
  "epoch": 5.0,
88
  "eval_loss": 3.143342971801758,
89
- "eval_runtime": 19.7887,
90
- "eval_samples_per_second": 1608.847,
91
- "eval_steps_per_second": 6.317,
92
  "step": 2480
93
  },
94
  {
@@ -101,9 +101,9 @@
101
  {
102
  "epoch": 6.0,
103
  "eval_loss": 3.122177839279175,
104
- "eval_runtime": 20.027,
105
- "eval_samples_per_second": 1589.702,
106
- "eval_steps_per_second": 6.242,
107
  "step": 2976
108
  },
109
  {
@@ -116,9 +116,9 @@
116
  {
117
  "epoch": 7.0,
118
  "eval_loss": 3.110217332839966,
119
- "eval_runtime": 21.4316,
120
- "eval_samples_per_second": 1485.514,
121
- "eval_steps_per_second": 5.832,
122
  "step": 3472
123
  },
124
  {
@@ -131,9 +131,9 @@
131
  {
132
  "epoch": 8.0,
133
  "eval_loss": 3.0961976051330566,
134
- "eval_runtime": 25.6412,
135
- "eval_samples_per_second": 1241.633,
136
- "eval_steps_per_second": 4.875,
137
  "step": 3968
138
  },
139
  {
@@ -146,9 +146,9 @@
146
  {
147
  "epoch": 9.0,
148
  "eval_loss": 3.093751907348633,
149
- "eval_runtime": 25.4277,
150
- "eval_samples_per_second": 1252.062,
151
- "eval_steps_per_second": 4.916,
152
  "step": 4464
153
  },
154
  {
@@ -161,9 +161,9 @@
161
  {
162
  "epoch": 10.0,
163
  "eval_loss": 3.0947883129119873,
164
- "eval_runtime": 27.0755,
165
- "eval_samples_per_second": 1175.86,
166
- "eval_steps_per_second": 4.617,
167
  "step": 4960
168
  },
169
  {
@@ -176,9 +176,9 @@
176
  {
177
  "epoch": 11.0,
178
  "eval_loss": 3.1004531383514404,
179
- "eval_runtime": 25.2164,
180
- "eval_samples_per_second": 1262.553,
181
- "eval_steps_per_second": 4.957,
182
  "step": 5456
183
  },
184
  {
@@ -191,9 +191,9 @@
191
  {
192
  "epoch": 12.0,
193
  "eval_loss": 3.103127956390381,
194
- "eval_runtime": 25.3186,
195
- "eval_samples_per_second": 1257.455,
196
- "eval_steps_per_second": 4.937,
197
  "step": 5952
198
  },
199
  {
@@ -206,9 +206,9 @@
206
  {
207
  "epoch": 13.0,
208
  "eval_loss": 3.1127891540527344,
209
- "eval_runtime": 25.1178,
210
- "eval_samples_per_second": 1267.507,
211
- "eval_steps_per_second": 4.977,
212
  "step": 6448
213
  },
214
  {
@@ -221,9 +221,9 @@
221
  {
222
  "epoch": 14.0,
223
  "eval_loss": 3.121568202972412,
224
- "eval_runtime": 25.7204,
225
- "eval_samples_per_second": 1237.813,
226
- "eval_steps_per_second": 4.86,
227
  "step": 6944
228
  },
229
  {
@@ -236,9 +236,9 @@
236
  {
237
  "epoch": 15.0,
238
  "eval_loss": 3.133429765701294,
239
- "eval_runtime": 20.0808,
240
- "eval_samples_per_second": 1585.443,
241
- "eval_steps_per_second": 6.225,
242
  "step": 7440
243
  },
244
  {
@@ -251,9 +251,9 @@
251
  {
252
  "epoch": 16.0,
253
  "eval_loss": 3.145784616470337,
254
- "eval_runtime": 19.9481,
255
- "eval_samples_per_second": 1595.99,
256
- "eval_steps_per_second": 6.266,
257
  "step": 7936
258
  },
259
  {
@@ -266,9 +266,9 @@
266
  {
267
  "epoch": 17.0,
268
  "eval_loss": 3.153964042663574,
269
- "eval_runtime": 19.733,
270
- "eval_samples_per_second": 1613.388,
271
- "eval_steps_per_second": 6.335,
272
  "step": 8432
273
  },
274
  {
@@ -281,9 +281,9 @@
281
  {
282
  "epoch": 18.0,
283
  "eval_loss": 3.1646533012390137,
284
- "eval_runtime": 19.726,
285
- "eval_samples_per_second": 1613.965,
286
- "eval_steps_per_second": 6.337,
287
  "step": 8928
288
  }
289
  ],
 
26
  {
27
  "epoch": 1.0,
28
  "eval_loss": 3.5286149978637695,
29
+ "eval_runtime": 19.6261,
30
+ "eval_samples_per_second": 1622.178,
31
+ "eval_steps_per_second": 6.369,
32
  "step": 496
33
  },
34
  {
 
41
  {
42
  "epoch": 2.0,
43
  "eval_loss": 3.3015408515930176,
44
+ "eval_runtime": 19.6779,
45
+ "eval_samples_per_second": 1617.907,
46
+ "eval_steps_per_second": 6.352,
47
  "step": 992
48
  },
49
  {
 
56
  {
57
  "epoch": 3.0,
58
  "eval_loss": 3.2177340984344482,
59
+ "eval_runtime": 19.6725,
60
+ "eval_samples_per_second": 1618.351,
61
+ "eval_steps_per_second": 6.354,
62
  "step": 1488
63
  },
64
  {
 
71
  {
72
  "epoch": 4.0,
73
  "eval_loss": 3.172600269317627,
74
+ "eval_runtime": 19.983,
75
+ "eval_samples_per_second": 1593.2,
76
+ "eval_steps_per_second": 6.255,
77
  "step": 1984
78
  },
79
  {
 
86
  {
87
  "epoch": 5.0,
88
  "eval_loss": 3.143342971801758,
89
+ "eval_runtime": 19.7509,
90
+ "eval_samples_per_second": 1611.929,
91
+ "eval_steps_per_second": 6.329,
92
  "step": 2480
93
  },
94
  {
 
101
  {
102
  "epoch": 6.0,
103
  "eval_loss": 3.122177839279175,
104
+ "eval_runtime": 19.7374,
105
+ "eval_samples_per_second": 1613.03,
106
+ "eval_steps_per_second": 6.333,
107
  "step": 2976
108
  },
109
  {
 
116
  {
117
  "epoch": 7.0,
118
  "eval_loss": 3.110217332839966,
119
+ "eval_runtime": 19.7314,
120
+ "eval_samples_per_second": 1613.519,
121
+ "eval_steps_per_second": 6.335,
122
  "step": 3472
123
  },
124
  {
 
131
  {
132
  "epoch": 8.0,
133
  "eval_loss": 3.0961976051330566,
134
+ "eval_runtime": 19.9255,
135
+ "eval_samples_per_second": 1597.8,
136
+ "eval_steps_per_second": 6.273,
137
  "step": 3968
138
  },
139
  {
 
146
  {
147
  "epoch": 9.0,
148
  "eval_loss": 3.093751907348633,
149
+ "eval_runtime": 19.7735,
150
+ "eval_samples_per_second": 1610.084,
151
+ "eval_steps_per_second": 6.322,
152
  "step": 4464
153
  },
154
  {
 
161
  {
162
  "epoch": 10.0,
163
  "eval_loss": 3.0947883129119873,
164
+ "eval_runtime": 19.7413,
165
+ "eval_samples_per_second": 1612.712,
166
+ "eval_steps_per_second": 6.332,
167
  "step": 4960
168
  },
169
  {
 
176
  {
177
  "epoch": 11.0,
178
  "eval_loss": 3.1004531383514404,
179
+ "eval_runtime": 19.7085,
180
+ "eval_samples_per_second": 1615.393,
181
+ "eval_steps_per_second": 6.342,
182
  "step": 5456
183
  },
184
  {
 
191
  {
192
  "epoch": 12.0,
193
  "eval_loss": 3.103127956390381,
194
+ "eval_runtime": 19.9635,
195
+ "eval_samples_per_second": 1594.759,
196
+ "eval_steps_per_second": 6.261,
197
  "step": 5952
198
  },
199
  {
 
206
  {
207
  "epoch": 13.0,
208
  "eval_loss": 3.1127891540527344,
209
+ "eval_runtime": 19.7496,
210
+ "eval_samples_per_second": 1612.03,
211
+ "eval_steps_per_second": 6.329,
212
  "step": 6448
213
  },
214
  {
 
221
  {
222
  "epoch": 14.0,
223
  "eval_loss": 3.121568202972412,
224
+ "eval_runtime": 19.7097,
225
+ "eval_samples_per_second": 1615.292,
226
+ "eval_steps_per_second": 6.342,
227
  "step": 6944
228
  },
229
  {
 
236
  {
237
  "epoch": 15.0,
238
  "eval_loss": 3.133429765701294,
239
+ "eval_runtime": 19.7554,
240
+ "eval_samples_per_second": 1611.559,
241
+ "eval_steps_per_second": 6.327,
242
  "step": 7440
243
  },
244
  {
 
251
  {
252
  "epoch": 16.0,
253
  "eval_loss": 3.145784616470337,
254
+ "eval_runtime": 19.9093,
255
+ "eval_samples_per_second": 1599.105,
256
+ "eval_steps_per_second": 6.278,
257
  "step": 7936
258
  },
259
  {
 
266
  {
267
  "epoch": 17.0,
268
  "eval_loss": 3.153964042663574,
269
+ "eval_runtime": 19.7382,
270
+ "eval_samples_per_second": 1612.963,
271
+ "eval_steps_per_second": 6.333,
272
  "step": 8432
273
  },
274
  {
 
281
  {
282
  "epoch": 18.0,
283
  "eval_loss": 3.1646533012390137,
284
+ "eval_runtime": 19.7147,
285
+ "eval_samples_per_second": 1614.886,
286
+ "eval_steps_per_second": 6.34,
287
  "step": 8928
288
  }
289
  ],
checkpoint-9000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c3935421c77a2c0993b65276d4eac518a2796b439d9c09991132ba490295309
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0beaf4c8a5b7d39410a27bab89f903a80b445e9952ea5d058c24d8d31577fc
3
  size 5905