ChiefTheLord commited on
Commit
ecc4149
·
verified ·
1 Parent(s): 6ee11e7

Delete checkpoints

Browse files
checkpoints/checkpoint-4096/eval_state.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc4e6e53d79ff3aedd2076033ebd6c3edcb0130907a36c6f7d028a31d6114c96
3
- size 60022890
 
 
 
 
checkpoints/checkpoint-4096/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7316308999e0576e1197d0480d698fe3642bfb1e63da9656548fbbab1c76436c
3
- size 41874064
 
 
 
 
checkpoints/checkpoint-4096/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389ffc3ac0cc293757ff3ab71841498ad149adde37936f91160e0edf606df07
3
- size 629387
 
 
 
 
checkpoints/checkpoint-4096/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e042085829a754d6255098127897d741a45f5dabe2edbbbc150188dd69fb7a1
3
- size 14645
 
 
 
 
checkpoints/checkpoint-4096/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:90078bef0ff08e77712ec862bc2a11b4989d3477480b20822129904ef078a3a3
3
- size 1383
 
 
 
 
checkpoints/checkpoint-4096/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:497950b7b89ed8d9cbfd38b9fbf5cb40dbb171f51668b6899f54e1890cc9a037
3
- size 1465
 
 
 
 
checkpoints/checkpoint-4096/trainer_state.json DELETED
@@ -1,230 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 0.18918294766985358,
6
- "eval_steps": 1024,
7
- "global_step": 4096,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.011823934229365849,
14
- "grad_norm": 1.7147449254989624,
15
- "learning_rate": 3.923076923076923e-06,
16
- "loss": 10.7188,
17
- "step": 256
18
- },
19
- {
20
- "epoch": 0.023647868458731697,
21
- "grad_norm": 1.359258770942688,
22
- "learning_rate": 7.861538461538463e-06,
23
- "loss": 9.5087,
24
- "step": 512
25
- },
26
- {
27
- "epoch": 0.03547180268809755,
28
- "grad_norm": 1.3955016136169434,
29
- "learning_rate": 9.999234191043789e-06,
30
- "loss": 7.9841,
31
- "step": 768
32
- },
33
- {
34
- "epoch": 0.047295736917463395,
35
- "grad_norm": 1.3613098859786987,
36
- "learning_rate": 9.992218473755348e-06,
37
- "loss": 6.9189,
38
- "step": 1024
39
- },
40
- {
41
- "epoch": 0.047295736917463395,
42
- "eval_bleu": 0.3808388870684137,
43
- "eval_ce_loss": 6.020909021978509,
44
- "eval_cos_loss": 0.9515757523987391,
45
- "eval_loss": 6.402040627449071,
46
- "eval_mse_loss": 1.913177224599063,
47
- "step": 1024
48
- },
49
- {
50
- "epoch": 0.047295736917463395,
51
- "eval_bleu": 0.3808388870684137,
52
- "eval_ce_loss": 6.020909021978509,
53
- "eval_cos_loss": 0.9515757523987391,
54
- "eval_loss": 6.402040627449071,
55
- "eval_mse_loss": 1.913177224599063,
56
- "eval_runtime": 210.9993,
57
- "eval_samples_per_second": 132.669,
58
- "eval_steps_per_second": 2.076,
59
- "step": 1024
60
- },
61
- {
62
- "epoch": 0.05911967114682925,
63
- "grad_norm": 1.3006930351257324,
64
- "learning_rate": 9.977882265113598e-06,
65
- "loss": 6.1178,
66
- "step": 1280
67
- },
68
- {
69
- "epoch": 0.0709436053761951,
70
- "grad_norm": 1.1683905124664307,
71
- "learning_rate": 9.956246587453995e-06,
72
- "loss": 5.4671,
73
- "step": 1536
74
- },
75
- {
76
- "epoch": 0.08276753960556095,
77
- "grad_norm": 1.1791032552719116,
78
- "learning_rate": 9.927343166910327e-06,
79
- "loss": 4.8947,
80
- "step": 1792
81
- },
82
- {
83
- "epoch": 0.09459147383492679,
84
- "grad_norm": 1.0977956056594849,
85
- "learning_rate": 9.89121438689216e-06,
86
- "loss": 4.3986,
87
- "step": 2048
88
- },
89
- {
90
- "epoch": 0.09459147383492679,
91
- "eval_bleu": 0.5811025576610133,
92
- "eval_ce_loss": 3.6939951426362336,
93
- "eval_cos_loss": 0.9414656509275305,
94
- "eval_loss": 4.072019737605091,
95
- "eval_mse_loss": 1.9116978286063835,
96
- "step": 2048
97
- },
98
- {
99
- "epoch": 0.09459147383492679,
100
- "eval_bleu": 0.5811025576610133,
101
- "eval_ce_loss": 3.6939951426362336,
102
- "eval_cos_loss": 0.9414656509275305,
103
- "eval_loss": 4.072019737605091,
104
- "eval_mse_loss": 1.9116978286063835,
105
- "eval_runtime": 209.0738,
106
- "eval_samples_per_second": 133.891,
107
- "eval_steps_per_second": 2.095,
108
- "step": 2048
109
- },
110
- {
111
- "epoch": 0.10641540806429264,
112
- "grad_norm": 0.9964653253555298,
113
- "learning_rate": 9.847913225934602e-06,
114
- "loss": 3.9476,
115
- "step": 2304
116
- },
117
- {
118
- "epoch": 0.1182393422936585,
119
- "grad_norm": 0.9317086338996887,
120
- "learning_rate": 9.79750318001165e-06,
121
- "loss": 3.5352,
122
- "step": 2560
123
- },
124
- {
125
- "epoch": 0.13006327652302435,
126
- "grad_norm": 0.8047142624855042,
127
- "learning_rate": 9.740058169426925e-06,
128
- "loss": 3.1663,
129
- "step": 2816
130
- },
131
- {
132
- "epoch": 0.1418872107523902,
133
- "grad_norm": 0.7831476330757141,
134
- "learning_rate": 9.675662430418375e-06,
135
- "loss": 2.8456,
136
- "step": 3072
137
- },
138
- {
139
- "epoch": 0.1418872107523902,
140
- "eval_bleu": 0.6805246113782488,
141
- "eval_ce_loss": 2.2575207627527245,
142
- "eval_cos_loss": 0.9274487899590845,
143
- "eval_loss": 2.6308557383001667,
144
- "eval_mse_loss": 1.9020064984826737,
145
- "step": 3072
146
- },
147
- {
148
- "epoch": 0.1418872107523902,
149
- "eval_bleu": 0.6805246113782488,
150
- "eval_ce_loss": 2.2575207627527245,
151
- "eval_cos_loss": 0.9274487899590845,
152
- "eval_loss": 2.6308557383001667,
153
- "eval_mse_loss": 1.9020064984826737,
154
- "eval_runtime": 207.6482,
155
- "eval_samples_per_second": 134.81,
156
- "eval_steps_per_second": 2.109,
157
- "step": 3072
158
- },
159
- {
160
- "epoch": 0.15371114498175603,
161
- "grad_norm": 0.6621416211128235,
162
- "learning_rate": 9.604410391635927e-06,
163
- "loss": 2.577,
164
- "step": 3328
165
- },
166
- {
167
- "epoch": 0.1655350792111219,
168
- "grad_norm": 0.5996536612510681,
169
- "learning_rate": 9.526406535673152e-06,
170
- "loss": 2.3395,
171
- "step": 3584
172
- },
173
- {
174
- "epoch": 0.17735901344048774,
175
- "grad_norm": 0.5423814654350281,
176
- "learning_rate": 9.441765245856039e-06,
177
- "loss": 2.1347,
178
- "step": 3840
179
- },
180
- {
181
- "epoch": 0.18918294766985358,
182
- "grad_norm": 0.5300129652023315,
183
- "learning_rate": 9.350610638513531e-06,
184
- "loss": 1.9651,
185
- "step": 4096
186
- },
187
- {
188
- "epoch": 0.18918294766985358,
189
- "eval_bleu": 0.7344211230524479,
190
- "eval_ce_loss": 1.4648735074147785,
191
- "eval_cos_loss": 0.8980057545720714,
192
- "eval_loss": 1.8271746014895505,
193
- "eval_mse_loss": 1.85798714912101,
194
- "step": 4096
195
- },
196
- {
197
- "epoch": 0.18918294766985358,
198
- "eval_bleu": 0.7344211230524479,
199
- "eval_ce_loss": 1.4648735074147785,
200
- "eval_cos_loss": 0.8980057545720714,
201
- "eval_loss": 1.8271746014895505,
202
- "eval_mse_loss": 1.85798714912101,
203
- "eval_runtime": 209.1885,
204
- "eval_samples_per_second": 133.817,
205
- "eval_steps_per_second": 2.094,
206
- "step": 4096
207
- }
208
- ],
209
- "logging_steps": 256,
210
- "max_steps": 21651,
211
- "num_input_tokens_seen": 0,
212
- "num_train_epochs": 1,
213
- "save_steps": 1024,
214
- "stateful_callbacks": {
215
- "TrainerControl": {
216
- "args": {
217
- "should_epoch_stop": false,
218
- "should_evaluate": false,
219
- "should_log": false,
220
- "should_save": true,
221
- "should_training_stop": false
222
- },
223
- "attributes": {}
224
- }
225
- },
226
- "total_flos": 0.0,
227
- "train_batch_size": 64,
228
- "trial_name": null,
229
- "trial_params": null
230
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-4096/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3053fe2b91fff7d931fb6a672ec144ae6add9e4dea009f57cf94be88b3f78e85
3
- size 5777