ChiefTheLord commited on
Commit
44be964
·
verified ·
1 Parent(s): d729989

Delete checkpoints/checkpoint-306

Browse files
checkpoints/checkpoint-306/adapter.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:705b77f4f5fc3f2207f1ea7c7637f4091f6443783fc9b894788e32de66bc6a04
3
- size 6439640
 
 
 
 
checkpoints/checkpoint-306/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd315c5e0840b81e818dd5ff765d8de5e5d04b7b6a9cdf05799e5bfbd7c0ce1
3
- size 3304962
 
 
 
 
checkpoints/checkpoint-306/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:402fa8046bb84d6ccced5bc526160e4d722440748b38fcb0b6cdf48297e39aa6
3
- size 14244
 
 
 
 
checkpoints/checkpoint-306/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde0307485e9704120944cc03273844cb67ca42817c760970d1e4ff5341cf1d0
3
- size 1064
 
 
 
 
checkpoints/checkpoint-306/trainer_state.json DELETED
@@ -1,280 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 6.0,
5
- "eval_steps": 500,
6
- "global_step": 306,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.3137254901960784,
13
- "grad_norm": 21.246755599975586,
14
- "learning_rate": 2.2580645161290322e-07,
15
- "loss": 10.64,
16
- "step": 16
17
- },
18
- {
19
- "epoch": 0.6274509803921569,
20
- "grad_norm": 22.127361297607422,
21
- "learning_rate": 4.677419354838709e-07,
22
- "loss": 10.6241,
23
- "step": 32
24
- },
25
- {
26
- "epoch": 0.9411764705882353,
27
- "grad_norm": 20.265277862548828,
28
- "learning_rate": 7.258064516129032e-07,
29
- "loss": 10.5868,
30
- "step": 48
31
- },
32
- {
33
- "epoch": 1.0,
34
- "eval_bleu": 0.011208247430062485,
35
- "eval_cap_loss": 4.461696884211372,
36
- "eval_con_loss": 2.0591132453843657,
37
- "eval_loss": 6.349074924693388,
38
- "step": 51
39
- },
40
- {
41
- "epoch": 1.0,
42
- "eval_bleu": 0.011208247430062485,
43
- "eval_cap_loss": 4.461696884211372,
44
- "eval_con_loss": 2.0591132453843657,
45
- "eval_loss": 6.349074924693388,
46
- "eval_runtime": 31.4,
47
- "eval_samples_per_second": 25.764,
48
- "eval_steps_per_second": 3.248,
49
- "step": 51
50
- },
51
- {
52
- "epoch": 1.2549019607843137,
53
- "grad_norm": 20.630815505981445,
54
- "learning_rate": 9.838709677419355e-07,
55
- "loss": 10.3883,
56
- "step": 64
57
- },
58
- {
59
- "epoch": 1.5686274509803921,
60
- "grad_norm": 20.142724990844727,
61
- "learning_rate": 9.918989757867583e-07,
62
- "loss": 10.4571,
63
- "step": 80
64
- },
65
- {
66
- "epoch": 1.8823529411764706,
67
- "grad_norm": 17.457569122314453,
68
- "learning_rate": 9.631619841257474e-07,
69
- "loss": 10.3837,
70
- "step": 96
71
- },
72
- {
73
- "epoch": 2.0,
74
- "eval_bleu": 0.01885383457992918,
75
- "eval_cap_loss": 3.9823871734095553,
76
- "eval_con_loss": 2.0591144795511283,
77
- "eval_loss": 6.109422552819345,
78
- "step": 102
79
- },
80
- {
81
- "epoch": 2.0,
82
- "eval_bleu": 0.01885383457992918,
83
- "eval_cap_loss": 3.9823871734095553,
84
- "eval_con_loss": 2.0591144795511283,
85
- "eval_loss": 6.109422552819345,
86
- "eval_runtime": 31.2828,
87
- "eval_samples_per_second": 25.861,
88
- "eval_steps_per_second": 3.261,
89
- "step": 102
90
- },
91
- {
92
- "epoch": 2.196078431372549,
93
- "grad_norm": 14.928309440612793,
94
- "learning_rate": 9.183978228987435e-07,
95
- "loss": 10.2051,
96
- "step": 112
97
- },
98
- {
99
- "epoch": 2.5098039215686274,
100
- "grad_norm": 15.862812995910645,
101
- "learning_rate": 8.535533905932737e-07,
102
- "loss": 10.256,
103
- "step": 128
104
- },
105
- {
106
- "epoch": 2.8235294117647056,
107
- "grad_norm": 13.910375595092773,
108
- "learning_rate": 7.737576698351878e-07,
109
- "loss": 10.2118,
110
- "step": 144
111
- },
112
- {
113
- "epoch": 3.0,
114
- "eval_bleu": 0.09867147788444039,
115
- "eval_cap_loss": 3.6369867652070287,
116
- "eval_con_loss": 2.059116457022873,
117
- "eval_loss": 5.936726282624638,
118
- "step": 153
119
- },
120
- {
121
- "epoch": 3.0,
122
- "eval_bleu": 0.09867147788444039,
123
- "eval_cap_loss": 3.6369867652070287,
124
- "eval_con_loss": 2.059116457022873,
125
- "eval_loss": 5.936726282624638,
126
- "eval_runtime": 31.5123,
127
- "eval_samples_per_second": 25.673,
128
- "eval_steps_per_second": 3.237,
129
- "step": 153
130
- },
131
- {
132
- "epoch": 3.1372549019607843,
133
- "grad_norm": 13.83757209777832,
134
- "learning_rate": 6.823851123199893e-07,
135
- "loss": 10.0243,
136
- "step": 160
137
- },
138
- {
139
- "epoch": 3.450980392156863,
140
- "grad_norm": 12.985786437988281,
141
- "learning_rate": 5.832997383071659e-07,
142
- "loss": 10.1282,
143
- "step": 176
144
- },
145
- {
146
- "epoch": 3.764705882352941,
147
- "grad_norm": 13.04931926727295,
148
- "learning_rate": 4.806917324978607e-07,
149
- "loss": 10.0797,
150
- "step": 192
151
- },
152
- {
153
- "epoch": 4.0,
154
- "eval_bleu": 0.12895394150561787,
155
- "eval_cap_loss": 3.4135348516352035,
156
- "eval_con_loss": 2.0591169128230975,
157
- "eval_loss": 5.825001257307389,
158
- "step": 204
159
- },
160
- {
161
- "epoch": 4.0,
162
- "eval_bleu": 0.12895394150561787,
163
- "eval_cap_loss": 3.4135348516352035,
164
- "eval_con_loss": 2.0591169128230975,
165
- "eval_loss": 5.825001257307389,
166
- "eval_runtime": 31.5247,
167
- "eval_samples_per_second": 25.662,
168
- "eval_steps_per_second": 3.236,
169
- "step": 204
170
- },
171
- {
172
- "epoch": 4.078431372549019,
173
- "grad_norm": 13.757915496826172,
174
- "learning_rate": 3.7890024686521205e-07,
175
- "loss": 9.9267,
176
- "step": 208
177
- },
178
- {
179
- "epoch": 4.392156862745098,
180
- "grad_norm": 14.317400932312012,
181
- "learning_rate": 2.8222990386287614e-07,
182
- "loss": 10.0335,
183
- "step": 224
184
- },
185
- {
186
- "epoch": 4.705882352941177,
187
- "grad_norm": 11.822975158691406,
188
- "learning_rate": 1.947687598396154e-07,
189
- "loss": 10.0163,
190
- "step": 240
191
- },
192
- {
193
- "epoch": 5.0,
194
- "eval_bleu": 0.13267291344127094,
195
- "eval_cap_loss": 3.3223345349816715,
196
- "eval_con_loss": 2.0591166206434663,
197
- "eval_loss": 5.779400519296234,
198
- "step": 255
199
- },
200
- {
201
- "epoch": 5.0,
202
- "eval_bleu": 0.13267291344127094,
203
- "eval_cap_loss": 3.3223345349816715,
204
- "eval_con_loss": 2.0591166206434663,
205
- "eval_loss": 5.779400519296234,
206
- "eval_runtime": 31.0795,
207
- "eval_samples_per_second": 26.03,
208
- "eval_steps_per_second": 3.282,
209
- "step": 255
210
- },
211
- {
212
- "epoch": 5.019607843137255,
213
- "grad_norm": 14.300347328186035,
214
- "learning_rate": 1.2021542673772584e-07,
215
- "loss": 9.8869,
216
- "step": 256
217
- },
218
- {
219
- "epoch": 5.333333333333333,
220
- "grad_norm": 11.969139099121094,
221
- "learning_rate": 6.172266286180161e-08,
222
- "loss": 10.0042,
223
- "step": 272
224
- },
225
- {
226
- "epoch": 5.647058823529412,
227
- "grad_norm": 12.140946388244629,
228
- "learning_rate": 2.1764047049913525e-08,
229
- "loss": 10.0031,
230
- "step": 288
231
- },
232
- {
233
- "epoch": 5.96078431372549,
234
- "grad_norm": 12.373481750488281,
235
- "learning_rate": 2.029374413821949e-09,
236
- "loss": 9.9969,
237
- "step": 304
238
- },
239
- {
240
- "epoch": 6.0,
241
- "eval_bleu": 0.13520814810096016,
242
- "eval_cap_loss": 3.3063340187072754,
243
- "eval_con_loss": 2.059116517796236,
244
- "eval_loss": 5.771400057802014,
245
- "step": 306
246
- },
247
- {
248
- "epoch": 6.0,
249
- "eval_bleu": 0.13520814810096016,
250
- "eval_cap_loss": 3.3063340187072754,
251
- "eval_con_loss": 2.059116517796236,
252
- "eval_loss": 5.771400057802014,
253
- "eval_runtime": 31.1282,
254
- "eval_samples_per_second": 25.989,
255
- "eval_steps_per_second": 3.277,
256
- "step": 306
257
- }
258
- ],
259
- "logging_steps": 16,
260
- "max_steps": 306,
261
- "num_input_tokens_seen": 0,
262
- "num_train_epochs": 6,
263
- "save_steps": 500,
264
- "stateful_callbacks": {
265
- "TrainerControl": {
266
- "args": {
267
- "should_epoch_stop": false,
268
- "should_evaluate": false,
269
- "should_log": false,
270
- "should_save": true,
271
- "should_training_stop": true
272
- },
273
- "attributes": {}
274
- }
275
- },
276
- "total_flos": 0.0,
277
- "train_batch_size": 64,
278
- "trial_name": null,
279
- "trial_params": null
280
- }