iko-01 commited on
Commit
eb9870f
·
verified ·
1 Parent(s): 36988a8

رفع النموذج النهائي مع جميع الملفات للتجربة أو إعادة التدريب

Browse files
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 50256,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_ctx": 1024,
14
+ "n_embd": 768,
15
+ "n_head": 12,
16
+ "n_inner": null,
17
+ "n_layer": 12,
18
+ "n_positions": 1024,
19
+ "reorder_and_upcast_attn": false,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
+ "scale_attn_weights": true,
23
+ "summary_activation": null,
24
+ "summary_first_dropout": 0.1,
25
+ "summary_proj_to_labels": true,
26
+ "summary_type": "cls_index",
27
+ "summary_use_proj": true,
28
+ "task_specific_params": {
29
+ "text-generation": {
30
+ "do_sample": true,
31
+ "max_length": 50
32
+ }
33
+ },
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.55.0",
36
+ "use_cache": true,
37
+ "vocab_size": 50257
38
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.55.0"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92218500c68d47709dea98a6fcc11fa80c399fe50957ce26a824de09c5e7e153
3
+ size 497774208
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a2af08115a6eac10941f5f4a788b93b0d8c232f5461ca05d705ba1406736497
3
+ size 995642298
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31837c6fe9da0a9813b8f58185410fe60d9dcba9a3b247aef3ca0eae0e27f2f4
3
+ size 14244
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:508cc0bc85eb5889d9764e66bd1d5f0a9a18a8654405d2b1f448efa09a778d04
3
+ size 988
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6621079a5a6859a1c4c806b5242eb7f8c3cbe5a3586aff97195f238e2d41709
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "model_max_length": 1024,
20
+ "pad_token": "<|endoftext|>",
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
23
+ }
trainer_state.json ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 1000,
7
+ "global_step": 3251,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.03076804738279297,
14
+ "grad_norm": 0.4551459848880768,
15
+ "learning_rate": 4.75e-05,
16
+ "loss": 1.0461,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.06153609476558594,
21
+ "grad_norm": 0.398708701133728,
22
+ "learning_rate": 4.849254205014281e-05,
23
+ "loss": 0.5103,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.09230414214837891,
28
+ "grad_norm": 0.5458264946937561,
29
+ "learning_rate": 4.690574420818788e-05,
30
+ "loss": 0.5164,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.12307218953117188,
35
+ "grad_norm": 0.4311501085758209,
36
+ "learning_rate": 4.5318946366232947e-05,
37
+ "loss": 0.5276,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.15384023691396484,
42
+ "grad_norm": 0.38870149850845337,
43
+ "learning_rate": 4.373214852427801e-05,
44
+ "loss": 0.5037,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.18460828429675782,
49
+ "grad_norm": 0.5005269050598145,
50
+ "learning_rate": 4.2145350682323075e-05,
51
+ "loss": 0.53,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.21537633167955078,
56
+ "grad_norm": 0.389077365398407,
57
+ "learning_rate": 4.055855284036814e-05,
58
+ "loss": 0.5078,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.24614437906234377,
63
+ "grad_norm": 0.44048070907592773,
64
+ "learning_rate": 3.8971754998413204e-05,
65
+ "loss": 0.522,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.27691242644513675,
70
+ "grad_norm": 0.34244316816329956,
71
+ "learning_rate": 3.7384957156458266e-05,
72
+ "loss": 0.4913,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.3076804738279297,
77
+ "grad_norm": 0.46572574973106384,
78
+ "learning_rate": 3.5798159314503333e-05,
79
+ "loss": 0.4939,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.3076804738279297,
84
+ "eval_loss": 0.3825926184654236,
85
+ "eval_runtime": 4.7315,
86
+ "eval_samples_per_second": 42.27,
87
+ "eval_steps_per_second": 21.135,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 0.33844852121072266,
92
+ "grad_norm": 0.4824311435222626,
93
+ "learning_rate": 3.4211361472548394e-05,
94
+ "loss": 0.5102,
95
+ "step": 1100
96
+ },
97
+ {
98
+ "epoch": 0.36921656859351565,
99
+ "grad_norm": 0.41817840933799744,
100
+ "learning_rate": 3.262456363059346e-05,
101
+ "loss": 0.519,
102
+ "step": 1200
103
+ },
104
+ {
105
+ "epoch": 0.3999846159763086,
106
+ "grad_norm": 0.33424732089042664,
107
+ "learning_rate": 3.103776578863853e-05,
108
+ "loss": 0.5107,
109
+ "step": 1300
110
+ },
111
+ {
112
+ "epoch": 0.43075266335910156,
113
+ "grad_norm": 0.5414207577705383,
114
+ "learning_rate": 2.9450967946683595e-05,
115
+ "loss": 0.5178,
116
+ "step": 1400
117
+ },
118
+ {
119
+ "epoch": 0.46152071074189455,
120
+ "grad_norm": 0.49227190017700195,
121
+ "learning_rate": 2.786417010472866e-05,
122
+ "loss": 0.5162,
123
+ "step": 1500
124
+ },
125
+ {
126
+ "epoch": 0.49228875812468753,
127
+ "grad_norm": 0.3693729043006897,
128
+ "learning_rate": 2.6277372262773724e-05,
129
+ "loss": 0.501,
130
+ "step": 1600
131
+ },
132
+ {
133
+ "epoch": 0.5230568055074805,
134
+ "grad_norm": 0.6255485415458679,
135
+ "learning_rate": 2.4690574420818788e-05,
136
+ "loss": 0.5097,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 0.5538248528902735,
141
+ "grad_norm": 0.4786062240600586,
142
+ "learning_rate": 2.3103776578863853e-05,
143
+ "loss": 0.5122,
144
+ "step": 1800
145
+ },
146
+ {
147
+ "epoch": 0.5845929002730664,
148
+ "grad_norm": 0.44093266129493713,
149
+ "learning_rate": 2.1516978736908917e-05,
150
+ "loss": 0.518,
151
+ "step": 1900
152
+ },
153
+ {
154
+ "epoch": 0.6153609476558594,
155
+ "grad_norm": 0.5277242064476013,
156
+ "learning_rate": 1.9930180894953985e-05,
157
+ "loss": 0.5087,
158
+ "step": 2000
159
+ },
160
+ {
161
+ "epoch": 0.6153609476558594,
162
+ "eval_loss": 0.37586718797683716,
163
+ "eval_runtime": 4.7269,
164
+ "eval_samples_per_second": 42.311,
165
+ "eval_steps_per_second": 21.155,
166
+ "step": 2000
167
+ },
168
+ {
169
+ "epoch": 0.6461289950386524,
170
+ "grad_norm": 0.3952804207801819,
171
+ "learning_rate": 1.834338305299905e-05,
172
+ "loss": 0.4978,
173
+ "step": 2100
174
+ },
175
+ {
176
+ "epoch": 0.6768970424214453,
177
+ "grad_norm": 0.49758800864219666,
178
+ "learning_rate": 1.6756585211044114e-05,
179
+ "loss": 0.524,
180
+ "step": 2200
181
+ },
182
+ {
183
+ "epoch": 0.7076650898042383,
184
+ "grad_norm": 0.3507545292377472,
185
+ "learning_rate": 1.5169787369089178e-05,
186
+ "loss": 0.5127,
187
+ "step": 2300
188
+ },
189
+ {
190
+ "epoch": 0.7384331371870313,
191
+ "grad_norm": 0.4394129514694214,
192
+ "learning_rate": 1.3582989527134243e-05,
193
+ "loss": 0.506,
194
+ "step": 2400
195
+ },
196
+ {
197
+ "epoch": 0.7692011845698242,
198
+ "grad_norm": 0.5479759573936462,
199
+ "learning_rate": 1.1996191685179309e-05,
200
+ "loss": 0.5175,
201
+ "step": 2500
202
+ },
203
+ {
204
+ "epoch": 0.7999692319526172,
205
+ "grad_norm": 0.3974688947200775,
206
+ "learning_rate": 1.0409393843224374e-05,
207
+ "loss": 0.544,
208
+ "step": 2600
209
+ },
210
+ {
211
+ "epoch": 0.8307372793354102,
212
+ "grad_norm": 0.4838125705718994,
213
+ "learning_rate": 8.822596001269438e-06,
214
+ "loss": 0.5323,
215
+ "step": 2700
216
+ },
217
+ {
218
+ "epoch": 0.8615053267182031,
219
+ "grad_norm": 0.3339136838912964,
220
+ "learning_rate": 7.235798159314504e-06,
221
+ "loss": 0.5321,
222
+ "step": 2800
223
+ },
224
+ {
225
+ "epoch": 0.8922733741009962,
226
+ "grad_norm": 0.3484480679035187,
227
+ "learning_rate": 5.649000317359569e-06,
228
+ "loss": 0.4897,
229
+ "step": 2900
230
+ },
231
+ {
232
+ "epoch": 0.9230414214837891,
233
+ "grad_norm": 0.377142995595932,
234
+ "learning_rate": 4.062202475404633e-06,
235
+ "loss": 0.5001,
236
+ "step": 3000
237
+ },
238
+ {
239
+ "epoch": 0.9230414214837891,
240
+ "eval_loss": 0.3712926208972931,
241
+ "eval_runtime": 4.7886,
242
+ "eval_samples_per_second": 41.766,
243
+ "eval_steps_per_second": 20.883,
244
+ "step": 3000
245
+ },
246
+ {
247
+ "epoch": 0.953809468866582,
248
+ "grad_norm": 0.4788321554660797,
249
+ "learning_rate": 2.4754046334496985e-06,
250
+ "loss": 0.5155,
251
+ "step": 3100
252
+ },
253
+ {
254
+ "epoch": 0.9845775162493751,
255
+ "grad_norm": 0.38534921407699585,
256
+ "learning_rate": 8.886067914947636e-07,
257
+ "loss": 0.5219,
258
+ "step": 3200
259
+ }
260
+ ],
261
+ "logging_steps": 100,
262
+ "max_steps": 3251,
263
+ "num_input_tokens_seen": 0,
264
+ "num_train_epochs": 1,
265
+ "save_steps": 1000,
266
+ "stateful_callbacks": {
267
+ "TrainerControl": {
268
+ "args": {
269
+ "should_epoch_stop": false,
270
+ "should_evaluate": false,
271
+ "should_log": false,
272
+ "should_save": true,
273
+ "should_training_stop": true
274
+ },
275
+ "attributes": {}
276
+ }
277
+ },
278
+ "total_flos": 1.3587708248064e+16,
279
+ "train_batch_size": 2,
280
+ "trial_name": null,
281
+ "trial_params": null
282
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2035e6cc8868b6ede6cf4b2fbbd70aee01dc9d6fb8781f8ac7125f0739741680
3
+ size 5304
vocab.json ADDED
The diff for this file is too large to render. See raw diff