Ewel commited on
Commit
f8c82f4
·
verified ·
1 Parent(s): e8427b4

Delete checkpoint-1100

Browse files
checkpoint-1100/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95d2da0b49dadfb0c2e16d8675e186f56db1db6b89e096c8c72dc2fb3933d1aa
3
- size 1112204984
 
 
 
 
checkpoint-1100/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c661c2016fbe6ab525e0d8b2c4af79d97bed930976bd1547877e1add79f06b
3
- size 2219803258
 
 
 
 
checkpoint-1100/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:73ad35afe7245582c3a30c2d911f786a320ba177c1efc7548bb163ec485239fa
3
- size 14244
 
 
 
 
checkpoint-1100/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e224eb97d58d959997cdc13c3d0c1c5045788254f92e7f75adbd5a9690a9db11
3
- size 1064
 
 
 
 
checkpoint-1100/sentencepiece.bpe.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
- size 5069051
 
 
 
 
checkpoint-1100/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ffb37461c391f096759f4a9bbbc329da0f36952f88bab061fcf84940c022e98
3
- size 17082999
 
 
 
 
checkpoint-1100/tokenizer_config.json DELETED
@@ -1,62 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<pad>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "250001": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": false,
46
- "cls_token": "<s>",
47
- "eos_token": "</s>",
48
- "extra_special_tokens": {},
49
- "mask_token": "<mask>",
50
- "max_length": 128,
51
- "model_max_length": 512,
52
- "pad_to_multiple_of": null,
53
- "pad_token": "<pad>",
54
- "pad_token_type_id": 0,
55
- "padding_side": "right",
56
- "sep_token": "</s>",
57
- "stride": 0,
58
- "tokenizer_class": "XLMRobertaTokenizer",
59
- "truncation_side": "right",
60
- "truncation_strategy": "longest_first",
61
- "unk_token": "<unk>"
62
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/trainer_state.json DELETED
@@ -1,229 +0,0 @@
1
- {
2
- "best_metric": 0.622703721149317,
3
- "best_model_checkpoint": "./trained_on_contrastive_encoder_10_epoch_question_freeze_0/checkpoint-1100",
4
- "epoch": 0.9981851179673321,
5
- "eval_steps": 100,
6
- "global_step": 1100,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.09074410163339383,
13
- "grad_norm": 4.864197254180908,
14
- "learning_rate": 9.903e-06,
15
- "loss": 0.706,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 0.09074410163339383,
20
- "eval_accuracy": 0.8221413477342099,
21
- "eval_f1_pos": 0.0,
22
- "eval_loss": 0.6011108756065369,
23
- "eval_runtime": 31.8999,
24
- "eval_samples_per_second": 267.023,
25
- "eval_steps_per_second": 16.709,
26
- "step": 100
27
- },
28
- {
29
- "epoch": 0.18148820326678766,
30
- "grad_norm": 3.3921830654144287,
31
- "learning_rate": 9.804000000000001e-06,
32
- "loss": 0.6796,
33
- "step": 200
34
- },
35
- {
36
- "epoch": 0.18148820326678766,
37
- "eval_accuracy": 0.8084057290443766,
38
- "eval_f1_pos": 0.37757437070938216,
39
- "eval_loss": 0.5799795389175415,
40
- "eval_runtime": 32.9637,
41
- "eval_samples_per_second": 258.405,
42
- "eval_steps_per_second": 16.169,
43
- "step": 200
44
- },
45
- {
46
- "epoch": 0.27223230490018147,
47
- "grad_norm": 10.387075424194336,
48
- "learning_rate": 9.704e-06,
49
- "loss": 0.606,
50
- "step": 300
51
- },
52
- {
53
- "epoch": 0.27223230490018147,
54
- "eval_accuracy": 0.6152852782343273,
55
- "eval_f1_pos": 0.442592277598231,
56
- "eval_loss": 0.6913803219795227,
57
- "eval_runtime": 31.9515,
58
- "eval_samples_per_second": 266.591,
59
- "eval_steps_per_second": 16.682,
60
- "step": 300
61
- },
62
- {
63
- "epoch": 0.3629764065335753,
64
- "grad_norm": 17.31307601928711,
65
- "learning_rate": 9.604000000000002e-06,
66
- "loss": 0.5344,
67
- "step": 400
68
- },
69
- {
70
- "epoch": 0.3629764065335753,
71
- "eval_accuracy": 0.642286921812632,
72
- "eval_f1_pos": 0.4836468395187256,
73
- "eval_loss": 0.6743361353874207,
74
- "eval_runtime": 34.1976,
75
- "eval_samples_per_second": 249.082,
76
- "eval_steps_per_second": 15.586,
77
- "step": 400
78
- },
79
- {
80
- "epoch": 0.4537205081669691,
81
- "grad_norm": 14.772553443908691,
82
- "learning_rate": 9.504e-06,
83
- "loss": 0.5077,
84
- "step": 500
85
- },
86
- {
87
- "epoch": 0.4537205081669691,
88
- "eval_accuracy": 0.7073256633012445,
89
- "eval_f1_pos": 0.527751468081076,
90
- "eval_loss": 0.5418139696121216,
91
- "eval_runtime": 31.806,
92
- "eval_samples_per_second": 267.811,
93
- "eval_steps_per_second": 16.758,
94
- "step": 500
95
- },
96
- {
97
- "epoch": 0.5444646098003629,
98
- "grad_norm": 17.365032196044922,
99
- "learning_rate": 9.405e-06,
100
- "loss": 0.4533,
101
- "step": 600
102
- },
103
- {
104
- "epoch": 0.5444646098003629,
105
- "eval_accuracy": 0.7528762620333411,
106
- "eval_f1_pos": 0.572415193987406,
107
- "eval_loss": 0.5408028364181519,
108
- "eval_runtime": 33.2247,
109
- "eval_samples_per_second": 256.375,
110
- "eval_steps_per_second": 16.042,
111
- "step": 600
112
- },
113
- {
114
- "epoch": 0.6352087114337568,
115
- "grad_norm": 37.66470718383789,
116
- "learning_rate": 9.305000000000002e-06,
117
- "loss": 0.452,
118
- "step": 700
119
- },
120
- {
121
- "epoch": 0.6352087114337568,
122
- "eval_accuracy": 0.7457149565625734,
123
- "eval_f1_pos": 0.571089108910891,
124
- "eval_loss": 0.45808446407318115,
125
- "eval_runtime": 33.2944,
126
- "eval_samples_per_second": 255.839,
127
- "eval_steps_per_second": 16.009,
128
- "step": 700
129
- },
130
- {
131
- "epoch": 0.7259528130671506,
132
- "grad_norm": 28.712770462036133,
133
- "learning_rate": 9.205e-06,
134
- "loss": 0.4402,
135
- "step": 800
136
- },
137
- {
138
- "epoch": 0.7259528130671506,
139
- "eval_accuracy": 0.7641465132660249,
140
- "eval_f1_pos": 0.5853457172342621,
141
- "eval_loss": 0.4426897168159485,
142
- "eval_runtime": 32.2392,
143
- "eval_samples_per_second": 264.213,
144
- "eval_steps_per_second": 16.533,
145
- "step": 800
146
- },
147
- {
148
- "epoch": 0.8166969147005445,
149
- "grad_norm": 16.69965171813965,
150
- "learning_rate": 9.105e-06,
151
- "loss": 0.4197,
152
- "step": 900
153
- },
154
- {
155
- "epoch": 0.8166969147005445,
156
- "eval_accuracy": 0.7602723644047898,
157
- "eval_f1_pos": 0.5824130879345604,
158
- "eval_loss": 0.4644756317138672,
159
- "eval_runtime": 31.9357,
160
- "eval_samples_per_second": 266.724,
161
- "eval_steps_per_second": 16.69,
162
- "step": 900
163
- },
164
- {
165
- "epoch": 0.9074410163339383,
166
- "grad_norm": 29.23394203186035,
167
- "learning_rate": 9.005000000000001e-06,
168
- "loss": 0.4153,
169
- "step": 1000
170
- },
171
- {
172
- "epoch": 0.9074410163339383,
173
- "eval_accuracy": 0.7779995304061986,
174
- "eval_f1_pos": 0.5992795083704174,
175
- "eval_loss": 0.4298250675201416,
176
- "eval_runtime": 31.9166,
177
- "eval_samples_per_second": 266.883,
178
- "eval_steps_per_second": 16.7,
179
- "step": 1000
180
- },
181
- {
182
- "epoch": 0.9981851179673321,
183
- "grad_norm": 8.876638412475586,
184
- "learning_rate": 8.905e-06,
185
- "loss": 0.4162,
186
- "step": 1100
187
- },
188
- {
189
- "epoch": 0.9981851179673321,
190
- "eval_accuracy": 0.8119276825545902,
191
- "eval_f1_pos": 0.622703721149317,
192
- "eval_loss": 0.352287232875824,
193
- "eval_runtime": 31.9198,
194
- "eval_samples_per_second": 266.856,
195
- "eval_steps_per_second": 16.698,
196
- "step": 1100
197
- }
198
- ],
199
- "logging_steps": 100,
200
- "max_steps": 10000,
201
- "num_input_tokens_seen": 0,
202
- "num_train_epochs": 10,
203
- "save_steps": 100,
204
- "stateful_callbacks": {
205
- "EarlyStoppingCallback": {
206
- "args": {
207
- "early_stopping_patience": 2,
208
- "early_stopping_threshold": 0.0
209
- },
210
- "attributes": {
211
- "early_stopping_patience_counter": 0
212
- }
213
- },
214
- "TrainerControl": {
215
- "args": {
216
- "should_epoch_stop": false,
217
- "should_evaluate": false,
218
- "should_log": false,
219
- "should_save": true,
220
- "should_training_stop": false
221
- },
222
- "attributes": {}
223
- }
224
- },
225
- "total_flos": 0.0,
226
- "train_batch_size": 16,
227
- "trial_name": null,
228
- "trial_params": null
229
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:15e0b4e07163290fc63734da0b9913f0b396eb3be141ebfbcf569bf53170e2ee
3
- size 5368