Attila1011 commited on
Commit
6ee11e7
·
verified ·
1 Parent(s): a12653e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoints/checkpoint-4096/eval_state.json filter=lfs diff=lfs merge=lfs -text
checkpoints/checkpoint-4096/eval_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4e6e53d79ff3aedd2076033ebd6c3edcb0130907a36c6f7d028a31d6114c96
3
+ size 60022890
checkpoints/checkpoint-4096/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7316308999e0576e1197d0480d698fe3642bfb1e63da9656548fbbab1c76436c
3
+ size 41874064
checkpoints/checkpoint-4096/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389ffc3ac0cc293757ff3ab71841498ad149adde37936f91160e0edf606df07
3
+ size 629387
checkpoints/checkpoint-4096/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e042085829a754d6255098127897d741a45f5dabe2edbbbc150188dd69fb7a1
3
+ size 14645
checkpoints/checkpoint-4096/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90078bef0ff08e77712ec862bc2a11b4989d3477480b20822129904ef078a3a3
3
+ size 1383
checkpoints/checkpoint-4096/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497950b7b89ed8d9cbfd38b9fbf5cb40dbb171f51668b6899f54e1890cc9a037
3
+ size 1465
checkpoints/checkpoint-4096/trainer_state.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.18918294766985358,
6
+ "eval_steps": 1024,
7
+ "global_step": 4096,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.011823934229365849,
14
+ "grad_norm": 1.7147449254989624,
15
+ "learning_rate": 3.923076923076923e-06,
16
+ "loss": 10.7188,
17
+ "step": 256
18
+ },
19
+ {
20
+ "epoch": 0.023647868458731697,
21
+ "grad_norm": 1.359258770942688,
22
+ "learning_rate": 7.861538461538463e-06,
23
+ "loss": 9.5087,
24
+ "step": 512
25
+ },
26
+ {
27
+ "epoch": 0.03547180268809755,
28
+ "grad_norm": 1.3955016136169434,
29
+ "learning_rate": 9.999234191043789e-06,
30
+ "loss": 7.9841,
31
+ "step": 768
32
+ },
33
+ {
34
+ "epoch": 0.047295736917463395,
35
+ "grad_norm": 1.3613098859786987,
36
+ "learning_rate": 9.992218473755348e-06,
37
+ "loss": 6.9189,
38
+ "step": 1024
39
+ },
40
+ {
41
+ "epoch": 0.047295736917463395,
42
+ "eval_bleu": 0.3808388870684137,
43
+ "eval_ce_loss": 6.020909021978509,
44
+ "eval_cos_loss": 0.9515757523987391,
45
+ "eval_loss": 6.402040627449071,
46
+ "eval_mse_loss": 1.913177224599063,
47
+ "step": 1024
48
+ },
49
+ {
50
+ "epoch": 0.047295736917463395,
51
+ "eval_bleu": 0.3808388870684137,
52
+ "eval_ce_loss": 6.020909021978509,
53
+ "eval_cos_loss": 0.9515757523987391,
54
+ "eval_loss": 6.402040627449071,
55
+ "eval_mse_loss": 1.913177224599063,
56
+ "eval_runtime": 210.9993,
57
+ "eval_samples_per_second": 132.669,
58
+ "eval_steps_per_second": 2.076,
59
+ "step": 1024
60
+ },
61
+ {
62
+ "epoch": 0.05911967114682925,
63
+ "grad_norm": 1.3006930351257324,
64
+ "learning_rate": 9.977882265113598e-06,
65
+ "loss": 6.1178,
66
+ "step": 1280
67
+ },
68
+ {
69
+ "epoch": 0.0709436053761951,
70
+ "grad_norm": 1.1683905124664307,
71
+ "learning_rate": 9.956246587453995e-06,
72
+ "loss": 5.4671,
73
+ "step": 1536
74
+ },
75
+ {
76
+ "epoch": 0.08276753960556095,
77
+ "grad_norm": 1.1791032552719116,
78
+ "learning_rate": 9.927343166910327e-06,
79
+ "loss": 4.8947,
80
+ "step": 1792
81
+ },
82
+ {
83
+ "epoch": 0.09459147383492679,
84
+ "grad_norm": 1.0977956056594849,
85
+ "learning_rate": 9.89121438689216e-06,
86
+ "loss": 4.3986,
87
+ "step": 2048
88
+ },
89
+ {
90
+ "epoch": 0.09459147383492679,
91
+ "eval_bleu": 0.5811025576610133,
92
+ "eval_ce_loss": 3.6939951426362336,
93
+ "eval_cos_loss": 0.9414656509275305,
94
+ "eval_loss": 4.072019737605091,
95
+ "eval_mse_loss": 1.9116978286063835,
96
+ "step": 2048
97
+ },
98
+ {
99
+ "epoch": 0.09459147383492679,
100
+ "eval_bleu": 0.5811025576610133,
101
+ "eval_ce_loss": 3.6939951426362336,
102
+ "eval_cos_loss": 0.9414656509275305,
103
+ "eval_loss": 4.072019737605091,
104
+ "eval_mse_loss": 1.9116978286063835,
105
+ "eval_runtime": 209.0738,
106
+ "eval_samples_per_second": 133.891,
107
+ "eval_steps_per_second": 2.095,
108
+ "step": 2048
109
+ },
110
+ {
111
+ "epoch": 0.10641540806429264,
112
+ "grad_norm": 0.9964653253555298,
113
+ "learning_rate": 9.847913225934602e-06,
114
+ "loss": 3.9476,
115
+ "step": 2304
116
+ },
117
+ {
118
+ "epoch": 0.1182393422936585,
119
+ "grad_norm": 0.9317086338996887,
120
+ "learning_rate": 9.79750318001165e-06,
121
+ "loss": 3.5352,
122
+ "step": 2560
123
+ },
124
+ {
125
+ "epoch": 0.13006327652302435,
126
+ "grad_norm": 0.8047142624855042,
127
+ "learning_rate": 9.740058169426925e-06,
128
+ "loss": 3.1663,
129
+ "step": 2816
130
+ },
131
+ {
132
+ "epoch": 0.1418872107523902,
133
+ "grad_norm": 0.7831476330757141,
134
+ "learning_rate": 9.675662430418375e-06,
135
+ "loss": 2.8456,
136
+ "step": 3072
137
+ },
138
+ {
139
+ "epoch": 0.1418872107523902,
140
+ "eval_bleu": 0.6805246113782488,
141
+ "eval_ce_loss": 2.2575207627527245,
142
+ "eval_cos_loss": 0.9274487899590845,
143
+ "eval_loss": 2.6308557383001667,
144
+ "eval_mse_loss": 1.9020064984826737,
145
+ "step": 3072
146
+ },
147
+ {
148
+ "epoch": 0.1418872107523902,
149
+ "eval_bleu": 0.6805246113782488,
150
+ "eval_ce_loss": 2.2575207627527245,
151
+ "eval_cos_loss": 0.9274487899590845,
152
+ "eval_loss": 2.6308557383001667,
153
+ "eval_mse_loss": 1.9020064984826737,
154
+ "eval_runtime": 207.6482,
155
+ "eval_samples_per_second": 134.81,
156
+ "eval_steps_per_second": 2.109,
157
+ "step": 3072
158
+ },
159
+ {
160
+ "epoch": 0.15371114498175603,
161
+ "grad_norm": 0.6621416211128235,
162
+ "learning_rate": 9.604410391635927e-06,
163
+ "loss": 2.577,
164
+ "step": 3328
165
+ },
166
+ {
167
+ "epoch": 0.1655350792111219,
168
+ "grad_norm": 0.5996536612510681,
169
+ "learning_rate": 9.526406535673152e-06,
170
+ "loss": 2.3395,
171
+ "step": 3584
172
+ },
173
+ {
174
+ "epoch": 0.17735901344048774,
175
+ "grad_norm": 0.5423814654350281,
176
+ "learning_rate": 9.441765245856039e-06,
177
+ "loss": 2.1347,
178
+ "step": 3840
179
+ },
180
+ {
181
+ "epoch": 0.18918294766985358,
182
+ "grad_norm": 0.5300129652023315,
183
+ "learning_rate": 9.350610638513531e-06,
184
+ "loss": 1.9651,
185
+ "step": 4096
186
+ },
187
+ {
188
+ "epoch": 0.18918294766985358,
189
+ "eval_bleu": 0.7344211230524479,
190
+ "eval_ce_loss": 1.4648735074147785,
191
+ "eval_cos_loss": 0.8980057545720714,
192
+ "eval_loss": 1.8271746014895505,
193
+ "eval_mse_loss": 1.85798714912101,
194
+ "step": 4096
195
+ },
196
+ {
197
+ "epoch": 0.18918294766985358,
198
+ "eval_bleu": 0.7344211230524479,
199
+ "eval_ce_loss": 1.4648735074147785,
200
+ "eval_cos_loss": 0.8980057545720714,
201
+ "eval_loss": 1.8271746014895505,
202
+ "eval_mse_loss": 1.85798714912101,
203
+ "eval_runtime": 209.1885,
204
+ "eval_samples_per_second": 133.817,
205
+ "eval_steps_per_second": 2.094,
206
+ "step": 4096
207
+ }
208
+ ],
209
+ "logging_steps": 256,
210
+ "max_steps": 21651,
211
+ "num_input_tokens_seen": 0,
212
+ "num_train_epochs": 1,
213
+ "save_steps": 1024,
214
+ "stateful_callbacks": {
215
+ "TrainerControl": {
216
+ "args": {
217
+ "should_epoch_stop": false,
218
+ "should_evaluate": false,
219
+ "should_log": false,
220
+ "should_save": true,
221
+ "should_training_stop": false
222
+ },
223
+ "attributes": {}
224
+ }
225
+ },
226
+ "total_flos": 0.0,
227
+ "train_batch_size": 64,
228
+ "trial_name": null,
229
+ "trial_params": null
230
+ }
checkpoints/checkpoint-4096/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3053fe2b91fff7d931fb6a672ec144ae6add9e4dea009f57cf94be88b3f78e85
3
+ size 5777