MJ199999 commited on
Commit
fa17a3f
·
1 Parent(s): 7fbde3b
Files changed (3) hide show
  1. README.md +250 -0
  2. config.json +34 -0
  3. tf_model.h5 +3 -0
README.md ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_keras_callback
4
+ model-index:
5
+ - name: gpt3_model
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information Keras had access to. You should
10
+ probably proofread and complete it, then remove this comment. -->
11
+
12
+ # gpt3_model
13
+
14
+ This model is a fine-tuned version of [/gdrive/MyDrive/Colab Notebooks/sw-project/SW-Project/skt-ko-gpt-trinity-1.2B-v0.5-new-2/](https://huggingface.co//gdrive/MyDrive/Colab Notebooks/sw-project/SW-Project/skt-ko-gpt-trinity-1.2B-v0.5-new-2/) on an unknown dataset.
15
+ It achieves the following results on the evaluation set:
16
+ - Train Loss: 0.7352
17
+ - Train Lr: 9.999999e-05
18
+ - Epoch: 199
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - optimizer: {'name': 'Adagrad', 'learning_rate': 9.999999e-05, 'decay': 0.0, 'initial_accumulator_value': 0.1, 'epsilon': 1e-07}
38
+ - training_precision: float32
39
+
40
+ ### Training results
41
+
42
+ | Train Loss | Train Lr | Epoch |
43
+ |:----------:|:------------:|:-----:|
44
+ | 6.4573 | 0.01 | 0 |
45
+ | 6.1839 | 0.01 | 1 |
46
+ | 6.1218 | 0.01 | 2 |
47
+ | 6.0346 | 0.01 | 3 |
48
+ | 5.9561 | 0.01 | 4 |
49
+ | 5.8583 | 0.01 | 5 |
50
+ | 5.7548 | 0.01 | 6 |
51
+ | 5.6404 | 0.01 | 7 |
52
+ | 5.4922 | 0.01 | 8 |
53
+ | 5.3803 | 0.01 | 9 |
54
+ | 5.2554 | 0.01 | 10 |
55
+ | 5.1228 | 0.01 | 11 |
56
+ | 5.0244 | 0.01 | 12 |
57
+ | 4.9005 | 0.01 | 13 |
58
+ | 4.7720 | 0.01 | 14 |
59
+ | 4.6211 | 0.01 | 15 |
60
+ | 4.5311 | 0.01 | 16 |
61
+ | 4.4448 | 0.01 | 17 |
62
+ | 4.3087 | 0.01 | 18 |
63
+ | 4.2361 | 0.01 | 19 |
64
+ | 4.1088 | 0.01 | 20 |
65
+ | 4.0153 | 0.01 | 21 |
66
+ | 3.9195 | 0.01 | 22 |
67
+ | 3.8411 | 0.01 | 23 |
68
+ | 3.7300 | 0.01 | 24 |
69
+ | 3.6430 | 0.01 | 25 |
70
+ | 3.5435 | 0.01 | 26 |
71
+ | 3.4905 | 0.01 | 27 |
72
+ | 3.4048 | 0.01 | 28 |
73
+ | 3.3365 | 0.01 | 29 |
74
+ | 3.2404 | 0.01 | 30 |
75
+ | 3.2009 | 0.01 | 31 |
76
+ | 3.1053 | 0.01 | 32 |
77
+ | 3.0403 | 0.01 | 33 |
78
+ | 2.9887 | 0.01 | 34 |
79
+ | 2.9240 | 0.01 | 35 |
80
+ | 2.8680 | 0.01 | 36 |
81
+ | 2.8000 | 0.01 | 37 |
82
+ | 2.7609 | 0.01 | 38 |
83
+ | 2.7069 | 0.01 | 39 |
84
+ | 2.6476 | 0.01 | 40 |
85
+ | 2.5793 | 0.01 | 41 |
86
+ | 2.5483 | 0.01 | 42 |
87
+ | 2.4905 | 0.01 | 43 |
88
+ | 2.4315 | 0.01 | 44 |
89
+ | 2.3970 | 0.01 | 45 |
90
+ | 2.3665 | 0.01 | 46 |
91
+ | 2.3116 | 0.01 | 47 |
92
+ | 2.2600 | 0.01 | 48 |
93
+ | 2.2116 | 0.01 | 49 |
94
+ | 2.1647 | 0.01 | 50 |
95
+ | 2.1323 | 0.01 | 51 |
96
+ | 2.0893 | 0.01 | 52 |
97
+ | 2.0366 | 0.01 | 53 |
98
+ | 2.0119 | 0.01 | 54 |
99
+ | 1.9681 | 0.01 | 55 |
100
+ | 1.9024 | 0.01 | 56 |
101
+ | 1.9008 | 0.01 | 57 |
102
+ | 1.8507 | 0.01 | 58 |
103
+ | 1.8216 | 0.01 | 59 |
104
+ | 1.7741 | 0.01 | 60 |
105
+ | 1.7425 | 0.01 | 61 |
106
+ | 1.6867 | 0.01 | 62 |
107
+ | 1.6754 | 0.01 | 63 |
108
+ | 1.6179 | 0.01 | 64 |
109
+ | 1.5847 | 0.01 | 65 |
110
+ | 1.5499 | 0.01 | 66 |
111
+ | 1.5183 | 0.01 | 67 |
112
+ | 1.4948 | 0.01 | 68 |
113
+ | 1.4515 | 0.01 | 69 |
114
+ | 1.4195 | 0.01 | 70 |
115
+ | 1.3916 | 0.01 | 71 |
116
+ | 1.3661 | 0.01 | 72 |
117
+ | 1.3419 | 0.01 | 73 |
118
+ | 1.3182 | 0.01 | 74 |
119
+ | 1.2940 | 0.01 | 75 |
120
+ | 1.2796 | 0.01 | 76 |
121
+ | 1.2415 | 0.01 | 77 |
122
+ | 1.2415 | 0.01 | 78 |
123
+ | 1.2154 | 0.01 | 79 |
124
+ | 1.1818 | 0.01 | 80 |
125
+ | 1.1709 | 0.01 | 81 |
126
+ | 1.1542 | 0.01 | 82 |
127
+ | 1.1441 | 0.01 | 83 |
128
+ | 1.1260 | 0.01 | 84 |
129
+ | 1.1057 | 0.01 | 85 |
130
+ | 1.0713 | 0.01 | 86 |
131
+ | 1.0709 | 0.01 | 87 |
132
+ | 1.0675 | 0.01 | 88 |
133
+ | 1.0444 | 0.01 | 89 |
134
+ | 1.0394 | 0.01 | 90 |
135
+ | 1.0258 | 0.01 | 91 |
136
+ | 1.0321 | 0.01 | 92 |
137
+ | 1.0138 | 0.01 | 93 |
138
+ | 0.9936 | 0.01 | 94 |
139
+ | 0.9952 | 0.01 | 95 |
140
+ | 0.9813 | 0.01 | 96 |
141
+ | 0.9743 | 0.01 | 97 |
142
+ | 0.9662 | 0.01 | 98 |
143
+ | 0.9604 | 0.01 | 99 |
144
+ | 0.8326 | 0.0009999999 | 100 |
145
+ | 0.8196 | 0.0009999999 | 101 |
146
+ | 0.8038 | 0.0009999999 | 102 |
147
+ | 0.8001 | 0.0009999999 | 103 |
148
+ | 0.7973 | 0.0009999999 | 104 |
149
+ | 0.7976 | 0.0009999999 | 105 |
150
+ | 0.7950 | 0.0009999999 | 106 |
151
+ | 0.7932 | 0.0009999999 | 107 |
152
+ | 0.7931 | 0.0009999999 | 108 |
153
+ | 0.7844 | 0.0009999999 | 109 |
154
+ | 0.7870 | 0.0009999999 | 110 |
155
+ | 0.7838 | 0.0009999999 | 111 |
156
+ | 0.7844 | 0.0009999999 | 112 |
157
+ | 0.7845 | 0.0009999999 | 113 |
158
+ | 0.7800 | 0.0009999999 | 114 |
159
+ | 0.7819 | 0.0009999999 | 115 |
160
+ | 0.7794 | 0.0009999999 | 116 |
161
+ | 0.7766 | 0.0009999999 | 117 |
162
+ | 0.7748 | 0.0009999999 | 118 |
163
+ | 0.7759 | 0.0009999999 | 119 |
164
+ | 0.7735 | 0.0009999999 | 120 |
165
+ | 0.7794 | 0.0009999999 | 121 |
166
+ | 0.7710 | 0.0009999999 | 122 |
167
+ | 0.7779 | 0.0009999999 | 123 |
168
+ | 0.7762 | 0.0009999999 | 124 |
169
+ | 0.7646 | 0.0009999999 | 125 |
170
+ | 0.7683 | 0.0009999999 | 126 |
171
+ | 0.7710 | 0.0009999999 | 127 |
172
+ | 0.7700 | 0.0009999999 | 128 |
173
+ | 0.7711 | 0.0009999999 | 129 |
174
+ | 0.7713 | 0.0009999999 | 130 |
175
+ | 0.7690 | 0.0009999999 | 131 |
176
+ | 0.7641 | 0.0009999999 | 132 |
177
+ | 0.7725 | 0.0009999999 | 133 |
178
+ | 0.7626 | 0.0009999999 | 134 |
179
+ | 0.7656 | 0.0009999999 | 135 |
180
+ | 0.7658 | 0.0009999999 | 136 |
181
+ | 0.7626 | 0.0009999999 | 137 |
182
+ | 0.7640 | 0.0009999999 | 138 |
183
+ | 0.7637 | 0.0009999999 | 139 |
184
+ | 0.7562 | 0.0009999999 | 140 |
185
+ | 0.7662 | 0.0009999999 | 141 |
186
+ | 0.7611 | 0.0009999999 | 142 |
187
+ | 0.7599 | 0.0009999999 | 143 |
188
+ | 0.7649 | 0.0009999999 | 144 |
189
+ | 0.7602 | 0.0009999999 | 145 |
190
+ | 0.7592 | 0.0009999999 | 146 |
191
+ | 0.7565 | 0.0009999999 | 147 |
192
+ | 0.7571 | 0.0009999999 | 148 |
193
+ | 0.7581 | 0.0009999999 | 149 |
194
+ | 0.7560 | 0.0009999999 | 150 |
195
+ | 0.7593 | 0.0009999999 | 151 |
196
+ | 0.7558 | 0.0009999999 | 152 |
197
+ | 0.7532 | 0.0009999999 | 153 |
198
+ | 0.7570 | 0.0009999999 | 154 |
199
+ | 0.7571 | 0.0009999999 | 155 |
200
+ | 0.7533 | 0.0009999999 | 156 |
201
+ | 0.7575 | 0.0009999999 | 157 |
202
+ | 0.7561 | 0.0009999999 | 158 |
203
+ | 0.7507 | 0.0009999999 | 159 |
204
+ | 0.7504 | 0.0009999999 | 160 |
205
+ | 0.7477 | 0.0009999999 | 161 |
206
+ | 0.7536 | 0.0009999999 | 162 |
207
+ | 0.7510 | 0.0009999999 | 163 |
208
+ | 0.7506 | 0.0009999999 | 164 |
209
+ | 0.7470 | 0.0009999999 | 165 |
210
+ | 0.7460 | 0.0009999999 | 166 |
211
+ | 0.7479 | 0.0009999999 | 167 |
212
+ | 0.7459 | 0.0009999999 | 168 |
213
+ | 0.7461 | 0.0009999999 | 169 |
214
+ | 0.7472 | 0.0009999999 | 170 |
215
+ | 0.7499 | 0.0009999999 | 171 |
216
+ | 0.7489 | 0.0009999999 | 172 |
217
+ | 0.7480 | 0.0009999999 | 173 |
218
+ | 0.7462 | 0.0009999999 | 174 |
219
+ | 0.7362 | 9.999999e-05 | 175 |
220
+ | 0.7333 | 9.999999e-05 | 176 |
221
+ | 0.7316 | 9.999999e-05 | 177 |
222
+ | 0.7349 | 9.999999e-05 | 178 |
223
+ | 0.7379 | 9.999999e-05 | 179 |
224
+ | 0.7323 | 9.999999e-05 | 180 |
225
+ | 0.7293 | 9.999999e-05 | 181 |
226
+ | 0.7368 | 9.999999e-05 | 182 |
227
+ | 0.7349 | 9.999999e-05 | 183 |
228
+ | 0.7355 | 9.999999e-05 | 184 |
229
+ | 0.7322 | 9.999999e-05 | 185 |
230
+ | 0.7333 | 9.999999e-05 | 186 |
231
+ | 0.7326 | 9.999999e-05 | 187 |
232
+ | 0.7334 | 9.999999e-05 | 188 |
233
+ | 0.7344 | 9.999999e-05 | 189 |
234
+ | 0.7329 | 9.999999e-05 | 190 |
235
+ | 0.7338 | 9.999999e-05 | 191 |
236
+ | 0.7342 | 9.999999e-05 | 192 |
237
+ | 0.7362 | 9.999999e-05 | 193 |
238
+ | 0.7339 | 9.999999e-05 | 194 |
239
+ | 0.7343 | 9.999999e-05 | 195 |
240
+ | 0.7302 | 9.999999e-05 | 196 |
241
+ | 0.7376 | 9.999999e-05 | 197 |
242
+ | 0.7380 | 9.999999e-05 | 198 |
243
+ | 0.7352 | 9.999999e-05 | 199 |
244
+
245
+
246
+ ### Framework versions
247
+
248
+ - Transformers 4.21.3
249
+ - TensorFlow 2.8.2
250
+ - Tokenizers 0.12.1
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gdrive/MyDrive/Colab Notebooks/sw-project/SW-Project/skt-ko-gpt-trinity-1.2B-v0.5-new-2/",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 8,
11
+ "gradient_checkpointing": false,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_ctx": 1024,
16
+ "n_embd": 1920,
17
+ "n_head": 16,
18
+ "n_inner": 7680,
19
+ "n_layer": 24,
20
+ "n_positions": 1024,
21
+ "pad_token_id": 8,
22
+ "reorder_and_upcast_attn": false,
23
+ "resid_pdrop": 0.1,
24
+ "scale_attn_by_inverse_layer_idx": false,
25
+ "scale_attn_weights": true,
26
+ "summary_activation": null,
27
+ "summary_first_dropout": 0.1,
28
+ "summary_proj_to_labels": true,
29
+ "summary_type": "cls_index",
30
+ "summary_use_proj": true,
31
+ "transformers_version": "4.21.3",
32
+ "use_cache": true,
33
+ "vocab_size": 51200
34
+ }
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8535cae8df9a846d7251714af84af0809b16b55710cafbb775d4828f77d82f41
3
+ size 4650580080