timo13113 commited on
Commit
566b065
·
verified ·
1 Parent(s): 6628fa2

Training in progress, step 100

Browse files
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 0,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 20,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.35.2",
37
+ "use_cache": true,
38
+ "vocab_size": 100
39
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13fbc4a8b5b8a8c8c0f5bb62651b39220452e9279f819675fbd2fbfda387ae5f
3
+ size 343691904
runs/Mar07_21-15-11_DESKTOP-C4VDTPF/events.out.tfevents.1709835327.DESKTOP-C4VDTPF.30740.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a0b9abd34b9c28676b5f477a01524ad2bdc8c62a28195d48c8a918aff3053f
3
+ size 8764
runs/Mar08_01-23-08_DESKTOP-C4VDTPF/events.out.tfevents.1709850193.DESKTOP-C4VDTPF.30740.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d82a9e29f3af749e84c46cf9f43e7ebcebdc0a861181672fc755ff05fd74290b
3
+ size 4774
runs/Mar08_01-24-03_DESKTOP-C4VDTPF/events.out.tfevents.1709850245.DESKTOP-C4VDTPF.30740.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3ebf911268ad104e3ef81f12eb510f1fc73e56c86508d982a695ef75c43194
3
+ size 4428
runs/Mar08_01-42-21_DESKTOP-C4VDTPF/events.out.tfevents.1709851344.DESKTOP-C4VDTPF.30740.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210bef5ccfe81ac491f6efa9cb83630bf143a21256064e9dffaddb25f1986861
3
+ size 5832
runs/Mar08_01-43-01_DESKTOP-C4VDTPF/events.out.tfevents.1709851384.DESKTOP-C4VDTPF.30740.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:897c24d43f42913d6b7898daa656bc5f9e62ee22e00e45ef6d38d8311ce6ecb0
3
+ size 5260
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 20,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 0,
14
+ "pad_type_id": 0,
15
+ "pad_token": "<|endoftext|>"
16
+ },
17
+ "added_tokens": [
18
+ {
19
+ "id": 0,
20
+ "content": "<|endoftext|>",
21
+ "single_word": false,
22
+ "lstrip": false,
23
+ "rstrip": false,
24
+ "normalized": false,
25
+ "special": true
26
+ }
27
+ ],
28
+ "normalizer": null,
29
+ "pre_tokenizer": {
30
+ "type": "Digits",
31
+ "individual_digits": true
32
+ },
33
+ "post_processor": {
34
+ "type": "ByteLevel",
35
+ "add_prefix_space": true,
36
+ "trim_offsets": false,
37
+ "use_regex": true
38
+ },
39
+ "decoder": {
40
+ "type": "ByteLevel",
41
+ "add_prefix_space": true,
42
+ "trim_offsets": true,
43
+ "use_regex": true
44
+ },
45
+ "model": {
46
+ "type": "BPE",
47
+ "dropout": null,
48
+ "unk_token": null,
49
+ "continuing_subword_prefix": null,
50
+ "end_of_word_suffix": null,
51
+ "fuse_unk": false,
52
+ "byte_fallback": false,
53
+ "vocab": {
54
+ "<|endoftext|>": 0,
55
+ "\n": 1,
56
+ "\r": 2,
57
+ "$": 3,
58
+ "+": 4,
59
+ "0": 5,
60
+ "1": 6,
61
+ "2": 7,
62
+ "3": 8,
63
+ "4": 9,
64
+ "5": 10,
65
+ "6": 11,
66
+ "7": 12,
67
+ "8": 13,
68
+ "9": 14,
69
+ ";": 15,
70
+ "=": 16,
71
+ "$$": 17,
72
+ ";=": 18,
73
+ "$=": 19,
74
+ "$+": 20,
75
+ "+;": 21,
76
+ "=+": 22,
77
+ "$;": 23,
78
+ ";;": 24,
79
+ "==": 25,
80
+ "++": 26,
81
+ ";$": 27,
82
+ ";+": 28,
83
+ "=;": 29,
84
+ "=$": 30,
85
+ "+$": 31,
86
+ "+=": 32,
87
+ ";$=": 33,
88
+ "$+=": 34,
89
+ "=;=": 35,
90
+ "$+$": 36,
91
+ "+$=": 37,
92
+ "=$;": 38,
93
+ ";=$": 39,
94
+ ";=;": 40,
95
+ "$+;": 41,
96
+ "=+$": 42,
97
+ "==;": 43,
98
+ "+$$": 44,
99
+ "+;=": 45,
100
+ "=$$": 46,
101
+ "$$=": 47,
102
+ ";=+": 48,
103
+ "$==": 49,
104
+ "=++": 50,
105
+ "++=": 51,
106
+ "$;=": 52,
107
+ "+$+": 53,
108
+ ";$;": 54,
109
+ ";++": 55,
110
+ "=$=": 56,
111
+ "=+;": 57,
112
+ "==+": 58,
113
+ "$$$": 59,
114
+ "$$;": 60,
115
+ "$$;;": 61,
116
+ "$=$;": 62,
117
+ "+;+": 63,
118
+ "+;+;": 64,
119
+ "$;+": 65,
120
+ ";;$": 66,
121
+ "===": 67,
122
+ "++$": 68,
123
+ ";+$": 69,
124
+ "\r\n": 70,
125
+ "+=+": 71,
126
+ "+$;": 72,
127
+ "+==": 73,
128
+ "+=$": 74,
129
+ ";$$": 75,
130
+ ";;=": 76,
131
+ ";$+": 77,
132
+ ";+;": 78,
133
+ ";;=;": 79,
134
+ "=$+": 80,
135
+ "=;;": 81,
136
+ "=;$": 82,
137
+ "=$+$": 83,
138
+ "==+$": 84,
139
+ "$$+": 85,
140
+ "$$$$": 86,
141
+ "$$$;": 87,
142
+ "$$=;": 88,
143
+ "$$;=+": 89,
144
+ ";==": 90,
145
+ ";==+": 91,
146
+ ";=++": 92,
147
+ ";=;$": 93,
148
+ ";==;": 94,
149
+ "$=+": 95,
150
+ "$=++": 96,
151
+ "$=;+": 97,
152
+ "$==++": 98,
153
+ "$++": 99
154
+ },
155
+ "merges": [
156
+ "$ $",
157
+ "; =",
158
+ "$ =",
159
+ "$ +",
160
+ "+ ;",
161
+ "= +",
162
+ "$ ;",
163
+ "; ;",
164
+ "= =",
165
+ "+ +",
166
+ "; $",
167
+ "; +",
168
+ "= ;",
169
+ "= $",
170
+ "+ $",
171
+ "+ =",
172
+ "; $=",
173
+ "$+ =",
174
+ "= ;=",
175
+ "$+ $",
176
+ "+ $=",
177
+ "= $;",
178
+ ";= $",
179
+ ";= ;",
180
+ "$+ ;",
181
+ "=+ $",
182
+ "== ;",
183
+ "+ $$",
184
+ "+ ;=",
185
+ "= $$",
186
+ "$$ =",
187
+ ";= +",
188
+ "$= =",
189
+ "=+ +",
190
+ "++ =",
191
+ "$ ;=",
192
+ "+ $+",
193
+ "; $;",
194
+ "; ++",
195
+ "= $=",
196
+ "= +;",
197
+ "= =+",
198
+ "$$ $",
199
+ "$$ ;",
200
+ "$$ ;;",
201
+ "$= $;",
202
+ "+; +",
203
+ "+; +;",
204
+ "$; +",
205
+ ";; $",
206
+ "== =",
207
+ "++ $",
208
+ ";+ $",
209
+ "\r \n",
210
+ "+ =+",
211
+ "+ $;",
212
+ "+ ==",
213
+ "+ =$",
214
+ "; $$",
215
+ "; ;=",
216
+ "; $+",
217
+ "; +;",
218
+ "; ;=;",
219
+ "= $+",
220
+ "= ;;",
221
+ "= ;$",
222
+ "= $+$",
223
+ "= =+$",
224
+ "$$ +",
225
+ "$$ $$",
226
+ "$$ $;",
227
+ "$$ =;",
228
+ "$$ ;=+",
229
+ ";= =",
230
+ ";= =+",
231
+ ";= ++",
232
+ ";= ;$",
233
+ ";= =;",
234
+ "$= +",
235
+ "$= ++",
236
+ "$= ;+",
237
+ "$= =++",
238
+ "$+ +"
239
+ ]
240
+ }
241
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ }
11
+ },
12
+ "bos_token": "<|endoftext|>",
13
+ "clean_up_tokenization_spaces": true,
14
+ "eos_token": "<|endoftext|>",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "<|endoftext|>",
17
+ "tokenizer_class": "PreTrainedTokenizerFast"
18
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdced9a69f638b11b5680c60e66fc295d25f07b40be8a1e7ff1f04a81a64341
3
+ size 4536