mtzig commited on
Commit
23a442e
·
verified ·
1 Parent(s): 1eff263

Model save

Browse files
Files changed (4) hide show
  1. README.md +158 -158
  2. config.json +1 -1
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.2150
20
  - Accuracy: 1.0
21
 
22
  ## Model description
@@ -49,163 +49,163 @@ The following hyperparameters were used during training:
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
- | No log | 0 | 0 | 2.6662 | 0.0 |
53
- | 2.6583 | 0.0064 | 100 | 2.6645 | 0.0 |
54
- | 2.6518 | 0.0128 | 200 | 2.6593 | 0.0 |
55
- | 2.6469 | 0.0192 | 300 | 2.6504 | 0.0 |
56
- | 2.6427 | 0.0256 | 400 | 2.6378 | 0.0 |
57
- | 2.6264 | 0.032 | 500 | 2.6211 | 0.138 |
58
- | 2.6002 | 0.0384 | 600 | 2.6000 | 0.454 |
59
- | 2.5695 | 0.0448 | 700 | 2.5736 | 0.454 |
60
- | 2.542 | 0.0512 | 800 | 2.5403 | 0.454 |
61
- | 2.4981 | 0.0576 | 900 | 2.5025 | 0.454 |
62
- | 2.4635 | 0.064 | 1000 | 2.4680 | 0.566 |
63
- | 2.4431 | 0.0704 | 1100 | 2.4419 | 0.771 |
64
- | 2.4231 | 0.0768 | 1200 | 2.4202 | 0.771 |
65
- | 2.3986 | 0.0832 | 1300 | 2.4002 | 0.827 |
66
- | 2.3749 | 0.0896 | 1400 | 2.3808 | 0.881 |
67
- | 2.3616 | 0.096 | 1500 | 2.3614 | 0.881 |
68
- | 2.3422 | 0.1024 | 1600 | 2.3417 | 0.881 |
69
- | 2.3213 | 0.1088 | 1700 | 2.3224 | 0.881 |
70
- | 2.3017 | 0.1152 | 1800 | 2.3036 | 0.881 |
71
- | 2.2832 | 0.1216 | 1900 | 2.2851 | 0.881 |
72
- | 2.266 | 0.128 | 2000 | 2.2666 | 0.881 |
73
- | 2.2441 | 0.1344 | 2100 | 2.2485 | 0.942 |
74
- | 2.2325 | 0.1408 | 2200 | 2.2304 | 0.942 |
75
- | 2.2141 | 0.1472 | 2300 | 2.2124 | 0.942 |
76
- | 2.189 | 0.1536 | 2400 | 2.1947 | 0.942 |
77
- | 2.1776 | 0.16 | 2500 | 2.1771 | 0.942 |
78
- | 2.1636 | 0.1664 | 2600 | 2.1597 | 0.942 |
79
- | 2.142 | 0.1728 | 2700 | 2.1425 | 0.942 |
80
- | 2.1271 | 0.1792 | 2800 | 2.1253 | 0.942 |
81
- | 2.1049 | 0.1856 | 2900 | 2.1083 | 0.942 |
82
- | 2.0904 | 0.192 | 3000 | 2.0915 | 0.942 |
83
- | 2.0714 | 0.1984 | 3100 | 2.0746 | 0.942 |
84
- | 2.0615 | 0.2048 | 3200 | 2.0579 | 0.942 |
85
- | 2.0432 | 0.2112 | 3300 | 2.0413 | 0.942 |
86
- | 2.0225 | 0.2176 | 3400 | 2.0250 | 0.942 |
87
- | 2.006 | 0.224 | 3500 | 2.0086 | 0.942 |
88
- | 1.9944 | 0.2304 | 3600 | 1.9923 | 0.942 |
89
- | 1.9738 | 0.2368 | 3700 | 1.9763 | 1.0 |
90
- | 1.9602 | 0.2432 | 3800 | 1.9602 | 1.0 |
91
- | 1.9462 | 0.2496 | 3900 | 1.9444 | 1.0 |
92
- | 1.9272 | 0.256 | 4000 | 1.9285 | 1.0 |
93
- | 1.9133 | 0.2624 | 4100 | 1.9130 | 1.0 |
94
- | 1.8891 | 0.2688 | 4200 | 1.8976 | 1.0 |
95
- | 1.8819 | 0.2752 | 4300 | 1.8823 | 1.0 |
96
- | 1.8655 | 0.2816 | 4400 | 1.8672 | 1.0 |
97
- | 1.8522 | 0.288 | 4500 | 1.8522 | 1.0 |
98
- | 1.8363 | 0.2944 | 4600 | 1.8373 | 1.0 |
99
- | 1.8226 | 0.3008 | 4700 | 1.8225 | 1.0 |
100
- | 1.8082 | 0.3072 | 4800 | 1.8076 | 1.0 |
101
- | 1.7906 | 0.3136 | 4900 | 1.7930 | 1.0 |
102
- | 1.7746 | 0.32 | 5000 | 1.7785 | 1.0 |
103
- | 1.7625 | 0.3264 | 5100 | 1.7644 | 1.0 |
104
- | 1.7471 | 0.3328 | 5200 | 1.7504 | 1.0 |
105
- | 1.7309 | 0.3392 | 5300 | 1.7366 | 1.0 |
106
- | 1.7202 | 0.3456 | 5400 | 1.7231 | 1.0 |
107
- | 1.7081 | 0.352 | 5500 | 1.7098 | 1.0 |
108
- | 1.6976 | 0.3584 | 5600 | 1.6967 | 1.0 |
109
- | 1.6823 | 0.3648 | 5700 | 1.6838 | 1.0 |
110
- | 1.6714 | 0.3712 | 5800 | 1.6711 | 1.0 |
111
- | 1.6582 | 0.3776 | 5900 | 1.6585 | 1.0 |
112
- | 1.6415 | 0.384 | 6000 | 1.6462 | 1.0 |
113
- | 1.6337 | 0.3904 | 6100 | 1.6340 | 1.0 |
114
- | 1.6214 | 0.3968 | 6200 | 1.6221 | 1.0 |
115
- | 1.6125 | 0.4032 | 6300 | 1.6103 | 1.0 |
116
- | 1.5948 | 0.4096 | 6400 | 1.5987 | 1.0 |
117
- | 1.5867 | 0.416 | 6500 | 1.5874 | 1.0 |
118
- | 1.5719 | 0.4224 | 6600 | 1.5762 | 1.0 |
119
- | 1.5657 | 0.4288 | 6700 | 1.5653 | 1.0 |
120
- | 1.555 | 0.4352 | 6800 | 1.5545 | 1.0 |
121
- | 1.5437 | 0.4416 | 6900 | 1.5439 | 1.0 |
122
- | 1.534 | 0.448 | 7000 | 1.5336 | 1.0 |
123
- | 1.5239 | 0.4544 | 7100 | 1.5234 | 1.0 |
124
- | 1.5142 | 0.4608 | 7200 | 1.5133 | 1.0 |
125
- | 1.5025 | 0.4672 | 7300 | 1.5036 | 1.0 |
126
- | 1.4958 | 0.4736 | 7400 | 1.4940 | 1.0 |
127
- | 1.4872 | 0.48 | 7500 | 1.4845 | 1.0 |
128
- | 1.4758 | 0.4864 | 7600 | 1.4753 | 1.0 |
129
- | 1.4671 | 0.4928 | 7700 | 1.4662 | 1.0 |
130
- | 1.4572 | 0.4992 | 7800 | 1.4574 | 1.0 |
131
- | 1.4471 | 0.5056 | 7900 | 1.4487 | 1.0 |
132
- | 1.4425 | 0.512 | 8000 | 1.4403 | 1.0 |
133
- | 1.4335 | 0.5184 | 8100 | 1.4320 | 1.0 |
134
- | 1.4219 | 0.5248 | 8200 | 1.4239 | 1.0 |
135
- | 1.4154 | 0.5312 | 8300 | 1.4160 | 1.0 |
136
- | 1.406 | 0.5376 | 8400 | 1.4083 | 1.0 |
137
- | 1.4019 | 0.544 | 8500 | 1.4008 | 1.0 |
138
- | 1.3911 | 0.5504 | 8600 | 1.3934 | 1.0 |
139
- | 1.3833 | 0.5568 | 8700 | 1.3863 | 1.0 |
140
- | 1.377 | 0.5632 | 8800 | 1.3793 | 1.0 |
141
- | 1.3716 | 0.5696 | 8900 | 1.3726 | 1.0 |
142
- | 1.3636 | 0.576 | 9000 | 1.3659 | 1.0 |
143
- | 1.3582 | 0.5824 | 9100 | 1.3594 | 1.0 |
144
- | 1.3488 | 0.5888 | 9200 | 1.3532 | 1.0 |
145
- | 1.3471 | 0.5952 | 9300 | 1.3471 | 1.0 |
146
- | 1.3409 | 0.6016 | 9400 | 1.3412 | 1.0 |
147
- | 1.3362 | 0.608 | 9500 | 1.3354 | 1.0 |
148
- | 1.3282 | 0.6144 | 9600 | 1.3299 | 1.0 |
149
- | 1.3258 | 0.6208 | 9700 | 1.3245 | 1.0 |
150
- | 1.3139 | 0.6272 | 9800 | 1.3192 | 1.0 |
151
- | 1.3142 | 0.6336 | 9900 | 1.3141 | 1.0 |
152
- | 1.3052 | 0.64 | 10000 | 1.3092 | 1.0 |
153
- | 1.3038 | 0.6464 | 10100 | 1.3044 | 1.0 |
154
- | 1.3015 | 0.6528 | 10200 | 1.2999 | 1.0 |
155
- | 1.2957 | 0.6592 | 10300 | 1.2954 | 1.0 |
156
- | 1.2881 | 0.6656 | 10400 | 1.2911 | 1.0 |
157
- | 1.2873 | 0.672 | 10500 | 1.2869 | 1.0 |
158
- | 1.2816 | 0.6784 | 10600 | 1.2830 | 1.0 |
159
- | 1.2794 | 0.6848 | 10700 | 1.2791 | 1.0 |
160
- | 1.276 | 0.6912 | 10800 | 1.2754 | 1.0 |
161
- | 1.2648 | 0.6976 | 10900 | 1.2718 | 1.0 |
162
- | 1.2693 | 0.704 | 11000 | 1.2683 | 1.0 |
163
- | 1.2601 | 0.7104 | 11100 | 1.2650 | 1.0 |
164
- | 1.2615 | 0.7168 | 11200 | 1.2619 | 1.0 |
165
- | 1.2588 | 0.7232 | 11300 | 1.2588 | 1.0 |
166
- | 1.2572 | 0.7296 | 11400 | 1.2559 | 1.0 |
167
- | 1.2521 | 0.736 | 11500 | 1.2531 | 1.0 |
168
- | 1.2511 | 0.7424 | 11600 | 1.2504 | 1.0 |
169
- | 1.2483 | 0.7488 | 11700 | 1.2479 | 1.0 |
170
- | 1.2449 | 0.7552 | 11800 | 1.2455 | 1.0 |
171
- | 1.241 | 0.7616 | 11900 | 1.2432 | 1.0 |
172
- | 1.2378 | 0.768 | 12000 | 1.2410 | 1.0 |
173
- | 1.2429 | 0.7744 | 12100 | 1.2390 | 1.0 |
174
- | 1.2367 | 0.7808 | 12200 | 1.2370 | 1.0 |
175
- | 1.2332 | 0.7872 | 12300 | 1.2352 | 1.0 |
176
- | 1.2325 | 0.7936 | 12400 | 1.2334 | 1.0 |
177
- | 1.2325 | 0.8 | 12500 | 1.2318 | 1.0 |
178
- | 1.2309 | 0.8064 | 12600 | 1.2302 | 1.0 |
179
- | 1.2295 | 0.8128 | 12700 | 1.2288 | 1.0 |
180
- | 1.2236 | 0.8192 | 12800 | 1.2274 | 1.0 |
181
- | 1.2246 | 0.8256 | 12900 | 1.2262 | 1.0 |
182
- | 1.2241 | 0.832 | 13000 | 1.2250 | 1.0 |
183
- | 1.223 | 0.8384 | 13100 | 1.2239 | 1.0 |
184
- | 1.2236 | 0.8448 | 13200 | 1.2229 | 1.0 |
185
- | 1.2214 | 0.8512 | 13300 | 1.2220 | 1.0 |
186
- | 1.2211 | 0.8576 | 13400 | 1.2211 | 1.0 |
187
- | 1.2188 | 0.864 | 13500 | 1.2203 | 1.0 |
188
- | 1.2155 | 0.8704 | 13600 | 1.2196 | 1.0 |
189
- | 1.219 | 0.8768 | 13700 | 1.2190 | 1.0 |
190
- | 1.2158 | 0.8832 | 13800 | 1.2184 | 1.0 |
191
- | 1.216 | 0.8896 | 13900 | 1.2178 | 1.0 |
192
- | 1.2145 | 0.896 | 14000 | 1.2174 | 1.0 |
193
- | 1.2154 | 0.9024 | 14100 | 1.2170 | 1.0 |
194
- | 1.2144 | 0.9088 | 14200 | 1.2166 | 1.0 |
195
- | 1.2144 | 0.9152 | 14300 | 1.2163 | 1.0 |
196
- | 1.2176 | 0.9216 | 14400 | 1.2160 | 1.0 |
197
- | 1.2148 | 0.928 | 14500 | 1.2158 | 1.0 |
198
- | 1.2147 | 0.9344 | 14600 | 1.2156 | 1.0 |
199
- | 1.2139 | 0.9408 | 14700 | 1.2154 | 1.0 |
200
- | 1.2166 | 0.9472 | 14800 | 1.2153 | 1.0 |
201
- | 1.2168 | 0.9536 | 14900 | 1.2152 | 1.0 |
202
- | 1.2177 | 0.96 | 15000 | 1.2151 | 1.0 |
203
- | 1.2109 | 0.9664 | 15100 | 1.2151 | 1.0 |
204
- | 1.2147 | 0.9728 | 15200 | 1.2150 | 1.0 |
205
- | 1.2176 | 0.9792 | 15300 | 1.2150 | 1.0 |
206
- | 1.2137 | 0.9856 | 15400 | 1.2150 | 1.0 |
207
- | 1.2176 | 0.992 | 15500 | 1.2150 | 1.0 |
208
- | 1.2163 | 0.9984 | 15600 | 1.2150 | 1.0 |
209
 
210
 
211
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.4003
20
  - Accuracy: 1.0
21
 
22
  ## Model description
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
+ | No log | 0 | 0 | 2.6616 | 0.0 |
53
+ | 2.6599 | 0.0064 | 100 | 2.6600 | 0.0 |
54
+ | 2.6551 | 0.0128 | 200 | 2.6551 | 0.0 |
55
+ | 2.6475 | 0.0192 | 300 | 2.6468 | 0.0 |
56
+ | 2.6375 | 0.0256 | 400 | 2.6349 | 0.0 |
57
+ | 2.6215 | 0.032 | 500 | 2.6191 | 0.0 |
58
+ | 2.6007 | 0.0384 | 600 | 2.5995 | 0.0 |
59
+ | 2.577 | 0.0448 | 700 | 2.5768 | 0.0 |
60
+ | 2.5529 | 0.0512 | 800 | 2.5516 | 0.454 |
61
+ | 2.5229 | 0.0576 | 900 | 2.5233 | 0.454 |
62
+ | 2.4946 | 0.064 | 1000 | 2.4954 | 0.497 |
63
+ | 2.4704 | 0.0704 | 1100 | 2.4695 | 0.661 |
64
+ | 2.4481 | 0.0768 | 1200 | 2.4460 | 0.714 |
65
+ | 2.4244 | 0.0832 | 1300 | 2.4244 | 0.771 |
66
+ | 2.4013 | 0.0896 | 1400 | 2.4039 | 0.881 |
67
+ | 2.3849 | 0.096 | 1500 | 2.3843 | 0.881 |
68
+ | 2.3656 | 0.1024 | 1600 | 2.3654 | 0.942 |
69
+ | 2.3473 | 0.1088 | 1700 | 2.3474 | 0.942 |
70
+ | 2.3294 | 0.1152 | 1800 | 2.3303 | 0.942 |
71
+ | 2.3125 | 0.1216 | 1900 | 2.3139 | 1.0 |
72
+ | 2.2971 | 0.128 | 2000 | 2.2979 | 1.0 |
73
+ | 2.28 | 0.1344 | 2100 | 2.2822 | 1.0 |
74
+ | 2.2665 | 0.1408 | 2200 | 2.2668 | 1.0 |
75
+ | 2.2508 | 0.1472 | 2300 | 2.2516 | 1.0 |
76
+ | 2.2343 | 0.1536 | 2400 | 2.2366 | 1.0 |
77
+ | 2.2217 | 0.16 | 2500 | 2.2217 | 1.0 |
78
+ | 2.2073 | 0.1664 | 2600 | 2.2070 | 1.0 |
79
+ | 2.1915 | 0.1728 | 2700 | 2.1925 | 1.0 |
80
+ | 2.1777 | 0.1792 | 2800 | 2.1781 | 1.0 |
81
+ | 2.1619 | 0.1856 | 2900 | 2.1638 | 0.939 |
82
+ | 2.1491 | 0.192 | 3000 | 2.1497 | 0.939 |
83
+ | 2.1344 | 0.1984 | 3100 | 2.1356 | 0.939 |
84
+ | 2.121 | 0.2048 | 3200 | 2.1216 | 0.939 |
85
+ | 2.1061 | 0.2112 | 3300 | 2.1077 | 0.939 |
86
+ | 2.0934 | 0.2176 | 3400 | 2.0940 | 0.939 |
87
+ | 2.0796 | 0.224 | 3500 | 2.0803 | 0.939 |
88
+ | 2.0647 | 0.2304 | 3600 | 2.0668 | 0.939 |
89
+ | 2.0533 | 0.2368 | 3700 | 2.0534 | 0.939 |
90
+ | 2.0388 | 0.2432 | 3800 | 2.0401 | 0.939 |
91
+ | 2.0257 | 0.2496 | 3900 | 2.0269 | 0.939 |
92
+ | 2.0131 | 0.256 | 4000 | 2.0137 | 0.939 |
93
+ | 1.9997 | 0.2624 | 4100 | 2.0008 | 0.939 |
94
+ | 1.9897 | 0.2688 | 4200 | 1.9879 | 0.939 |
95
+ | 1.9741 | 0.2752 | 4300 | 1.9752 | 0.939 |
96
+ | 1.9622 | 0.2816 | 4400 | 1.9626 | 0.939 |
97
+ | 1.9477 | 0.288 | 4500 | 1.9500 | 0.939 |
98
+ | 1.9374 | 0.2944 | 4600 | 1.9376 | 0.939 |
99
+ | 1.9238 | 0.3008 | 4700 | 1.9254 | 0.939 |
100
+ | 1.9102 | 0.3072 | 4800 | 1.9131 | 0.939 |
101
+ | 1.9014 | 0.3136 | 4900 | 1.9011 | 0.939 |
102
+ | 1.8905 | 0.32 | 5000 | 1.8892 | 0.939 |
103
+ | 1.8766 | 0.3264 | 5100 | 1.8774 | 0.939 |
104
+ | 1.8675 | 0.3328 | 5200 | 1.8658 | 0.939 |
105
+ | 1.8569 | 0.3392 | 5300 | 1.8543 | 0.939 |
106
+ | 1.8427 | 0.3456 | 5400 | 1.8428 | 0.939 |
107
+ | 1.8304 | 0.352 | 5500 | 1.8314 | 0.939 |
108
+ | 1.8182 | 0.3584 | 5600 | 1.8201 | 0.939 |
109
+ | 1.8086 | 0.3648 | 5700 | 1.8090 | 0.939 |
110
+ | 1.7961 | 0.3712 | 5800 | 1.7981 | 0.939 |
111
+ | 1.7859 | 0.3776 | 5900 | 1.7872 | 0.939 |
112
+ | 1.7797 | 0.384 | 6000 | 1.7767 | 0.939 |
113
+ | 1.7648 | 0.3904 | 6100 | 1.7662 | 0.939 |
114
+ | 1.7551 | 0.3968 | 6200 | 1.7560 | 0.939 |
115
+ | 1.7416 | 0.4032 | 6300 | 1.7458 | 1.0 |
116
+ | 1.7383 | 0.4096 | 6400 | 1.7359 | 1.0 |
117
+ | 1.7252 | 0.416 | 6500 | 1.7261 | 1.0 |
118
+ | 1.7194 | 0.4224 | 6600 | 1.7165 | 1.0 |
119
+ | 1.7048 | 0.4288 | 6700 | 1.7071 | 0.939 |
120
+ | 1.6956 | 0.4352 | 6800 | 1.6977 | 1.0 |
121
+ | 1.6874 | 0.4416 | 6900 | 1.6887 | 1.0 |
122
+ | 1.6773 | 0.448 | 7000 | 1.6797 | 1.0 |
123
+ | 1.6688 | 0.4544 | 7100 | 1.6709 | 1.0 |
124
+ | 1.6595 | 0.4608 | 7200 | 1.6622 | 1.0 |
125
+ | 1.6531 | 0.4672 | 7300 | 1.6538 | 1.0 |
126
+ | 1.6412 | 0.4736 | 7400 | 1.6454 | 1.0 |
127
+ | 1.6323 | 0.48 | 7500 | 1.6372 | 1.0 |
128
+ | 1.6269 | 0.4864 | 7600 | 1.6292 | 1.0 |
129
+ | 1.6186 | 0.4928 | 7700 | 1.6213 | 1.0 |
130
+ | 1.6125 | 0.4992 | 7800 | 1.6136 | 1.0 |
131
+ | 1.6068 | 0.5056 | 7900 | 1.6061 | 1.0 |
132
+ | 1.5938 | 0.512 | 8000 | 1.5987 | 1.0 |
133
+ | 1.5877 | 0.5184 | 8100 | 1.5915 | 1.0 |
134
+ | 1.5857 | 0.5248 | 8200 | 1.5844 | 1.0 |
135
+ | 1.5768 | 0.5312 | 8300 | 1.5775 | 1.0 |
136
+ | 1.5727 | 0.5376 | 8400 | 1.5708 | 1.0 |
137
+ | 1.5609 | 0.544 | 8500 | 1.5642 | 1.0 |
138
+ | 1.5597 | 0.5504 | 8600 | 1.5577 | 1.0 |
139
+ | 1.5546 | 0.5568 | 8700 | 1.5515 | 1.0 |
140
+ | 1.5474 | 0.5632 | 8800 | 1.5454 | 1.0 |
141
+ | 1.5393 | 0.5696 | 8900 | 1.5394 | 1.0 |
142
+ | 1.5357 | 0.576 | 9000 | 1.5336 | 1.0 |
143
+ | 1.5285 | 0.5824 | 9100 | 1.5279 | 1.0 |
144
+ | 1.5279 | 0.5888 | 9200 | 1.5224 | 1.0 |
145
+ | 1.5152 | 0.5952 | 9300 | 1.5171 | 1.0 |
146
+ | 1.5106 | 0.6016 | 9400 | 1.5118 | 1.0 |
147
+ | 1.5038 | 0.608 | 9500 | 1.5068 | 1.0 |
148
+ | 1.5031 | 0.6144 | 9600 | 1.5019 | 1.0 |
149
+ | 1.4928 | 0.6208 | 9700 | 1.4971 | 1.0 |
150
+ | 1.5003 | 0.6272 | 9800 | 1.4925 | 1.0 |
151
+ | 1.4862 | 0.6336 | 9900 | 1.4880 | 1.0 |
152
+ | 1.4891 | 0.64 | 10000 | 1.4837 | 1.0 |
153
+ | 1.4788 | 0.6464 | 10100 | 1.4795 | 1.0 |
154
+ | 1.4705 | 0.6528 | 10200 | 1.4754 | 1.0 |
155
+ | 1.469 | 0.6592 | 10300 | 1.4715 | 1.0 |
156
+ | 1.4715 | 0.6656 | 10400 | 1.4677 | 1.0 |
157
+ | 1.4615 | 0.672 | 10500 | 1.4640 | 1.0 |
158
+ | 1.4611 | 0.6784 | 10600 | 1.4605 | 1.0 |
159
+ | 1.4545 | 0.6848 | 10700 | 1.4570 | 1.0 |
160
+ | 1.4506 | 0.6912 | 10800 | 1.4537 | 1.0 |
161
+ | 1.4621 | 0.6976 | 10900 | 1.4506 | 1.0 |
162
+ | 1.4437 | 0.704 | 11000 | 1.4475 | 1.0 |
163
+ | 1.4524 | 0.7104 | 11100 | 1.4446 | 1.0 |
164
+ | 1.4406 | 0.7168 | 11200 | 1.4418 | 1.0 |
165
+ | 1.4373 | 0.7232 | 11300 | 1.4391 | 1.0 |
166
+ | 1.432 | 0.7296 | 11400 | 1.4365 | 1.0 |
167
+ | 1.4341 | 0.736 | 11500 | 1.4341 | 1.0 |
168
+ | 1.4285 | 0.7424 | 11600 | 1.4317 | 1.0 |
169
+ | 1.4269 | 0.7488 | 11700 | 1.4295 | 1.0 |
170
+ | 1.4269 | 0.7552 | 11800 | 1.4273 | 1.0 |
171
+ | 1.4282 | 0.7616 | 11900 | 1.4253 | 1.0 |
172
+ | 1.4282 | 0.768 | 12000 | 1.4234 | 1.0 |
173
+ | 1.4117 | 0.7744 | 12100 | 1.4216 | 1.0 |
174
+ | 1.4186 | 0.7808 | 12200 | 1.4198 | 1.0 |
175
+ | 1.4203 | 0.7872 | 12300 | 1.4182 | 1.0 |
176
+ | 1.4168 | 0.7936 | 12400 | 1.4166 | 1.0 |
177
+ | 1.412 | 0.8 | 12500 | 1.4152 | 1.0 |
178
+ | 1.4106 | 0.8064 | 12600 | 1.4138 | 1.0 |
179
+ | 1.4093 | 0.8128 | 12700 | 1.4125 | 1.0 |
180
+ | 1.4176 | 0.8192 | 12800 | 1.4113 | 1.0 |
181
+ | 1.4117 | 0.8256 | 12900 | 1.4102 | 1.0 |
182
+ | 1.4093 | 0.832 | 13000 | 1.4092 | 1.0 |
183
+ | 1.4083 | 0.8384 | 13100 | 1.4082 | 1.0 |
184
+ | 1.404 | 0.8448 | 13200 | 1.4073 | 1.0 |
185
+ | 1.4059 | 0.8512 | 13300 | 1.4065 | 1.0 |
186
+ | 1.4038 | 0.8576 | 13400 | 1.4057 | 1.0 |
187
+ | 1.4065 | 0.864 | 13500 | 1.4050 | 1.0 |
188
+ | 1.4114 | 0.8704 | 13600 | 1.4044 | 1.0 |
189
+ | 1.4019 | 0.8768 | 13700 | 1.4038 | 1.0 |
190
+ | 1.4069 | 0.8832 | 13800 | 1.4033 | 1.0 |
191
+ | 1.405 | 0.8896 | 13900 | 1.4028 | 1.0 |
192
+ | 1.4067 | 0.896 | 14000 | 1.4024 | 1.0 |
193
+ | 1.4035 | 0.9024 | 14100 | 1.4020 | 1.0 |
194
+ | 1.4046 | 0.9088 | 14200 | 1.4017 | 1.0 |
195
+ | 1.4036 | 0.9152 | 14300 | 1.4014 | 1.0 |
196
+ | 1.3958 | 0.9216 | 14400 | 1.4012 | 1.0 |
197
+ | 1.4011 | 0.928 | 14500 | 1.4010 | 1.0 |
198
+ | 1.401 | 0.9344 | 14600 | 1.4008 | 1.0 |
199
+ | 1.4022 | 0.9408 | 14700 | 1.4007 | 1.0 |
200
+ | 1.3959 | 0.9472 | 14800 | 1.4005 | 1.0 |
201
+ | 1.3951 | 0.9536 | 14900 | 1.4005 | 1.0 |
202
+ | 1.393 | 0.96 | 15000 | 1.4004 | 1.0 |
203
+ | 1.4074 | 0.9664 | 15100 | 1.4003 | 1.0 |
204
+ | 1.3991 | 0.9728 | 15200 | 1.4003 | 1.0 |
205
+ | 1.3929 | 0.9792 | 15300 | 1.4003 | 1.0 |
206
+ | 1.4011 | 0.9856 | 15400 | 1.4003 | 1.0 |
207
+ | 1.3928 | 0.992 | 15500 | 1.4003 | 1.0 |
208
+ | 1.3956 | 0.9984 | 15600 | 1.4003 | 1.0 |
209
 
210
 
211
  ### Framework versions
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "dropout": 0.0,
8
  "mlp_dim": 4,
9
  "model_type": "nanogpt",
10
- "n_embd": 10,
11
  "n_head": 1,
12
  "n_layer": 1,
13
  "nonlinearity": "RELU",
 
7
  "dropout": 0.0,
8
  "mlp_dim": 4,
9
  "model_type": "nanogpt",
10
+ "n_embd": 8,
11
  "n_head": 1,
12
  "n_layer": 1,
13
  "nonlinearity": "RELU",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cbf90badb68fa35a40c20748478d6d26d1d82ef1ef4e68e50c22eeb293cf7ae
3
- size 7960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c052f4b392e4f3dd47eeca68ba7ea6f3e27831d0e5d44aac23da9d2fbea621
3
+ size 5872
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93949ea201b1389d8b46906310769b72a0853861ce5cf0276d3a8b7f4d9f074d
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18639df385ca1d08af21c0a140152f17a00f2da79a38258c2f965a39f736653f
3
  size 5240