mtzig commited on
Commit
37f8cbf
·
verified ·
1 Parent(s): 23a442e

Model save

Browse files
Files changed (4) hide show
  1. README.md +159 -159
  2. config.json +2 -2
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.4003
20
- - Accuracy: 1.0
21
 
22
  ## Model description
23
 
@@ -49,163 +49,163 @@ The following hyperparameters were used during training:
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
- | No log | 0 | 0 | 2.6616 | 0.0 |
53
- | 2.6599 | 0.0064 | 100 | 2.6600 | 0.0 |
54
- | 2.6551 | 0.0128 | 200 | 2.6551 | 0.0 |
55
- | 2.6475 | 0.0192 | 300 | 2.6468 | 0.0 |
56
- | 2.6375 | 0.0256 | 400 | 2.6349 | 0.0 |
57
- | 2.6215 | 0.032 | 500 | 2.6191 | 0.0 |
58
- | 2.6007 | 0.0384 | 600 | 2.5995 | 0.0 |
59
- | 2.577 | 0.0448 | 700 | 2.5768 | 0.0 |
60
- | 2.5529 | 0.0512 | 800 | 2.5516 | 0.454 |
61
- | 2.5229 | 0.0576 | 900 | 2.5233 | 0.454 |
62
- | 2.4946 | 0.064 | 1000 | 2.4954 | 0.497 |
63
- | 2.4704 | 0.0704 | 1100 | 2.4695 | 0.661 |
64
- | 2.4481 | 0.0768 | 1200 | 2.4460 | 0.714 |
65
- | 2.4244 | 0.0832 | 1300 | 2.4244 | 0.771 |
66
- | 2.4013 | 0.0896 | 1400 | 2.4039 | 0.881 |
67
- | 2.3849 | 0.096 | 1500 | 2.3843 | 0.881 |
68
- | 2.3656 | 0.1024 | 1600 | 2.3654 | 0.942 |
69
- | 2.3473 | 0.1088 | 1700 | 2.3474 | 0.942 |
70
- | 2.3294 | 0.1152 | 1800 | 2.3303 | 0.942 |
71
- | 2.3125 | 0.1216 | 1900 | 2.3139 | 1.0 |
72
- | 2.2971 | 0.128 | 2000 | 2.2979 | 1.0 |
73
- | 2.28 | 0.1344 | 2100 | 2.2822 | 1.0 |
74
- | 2.2665 | 0.1408 | 2200 | 2.2668 | 1.0 |
75
- | 2.2508 | 0.1472 | 2300 | 2.2516 | 1.0 |
76
- | 2.2343 | 0.1536 | 2400 | 2.2366 | 1.0 |
77
- | 2.2217 | 0.16 | 2500 | 2.2217 | 1.0 |
78
- | 2.2073 | 0.1664 | 2600 | 2.2070 | 1.0 |
79
- | 2.1915 | 0.1728 | 2700 | 2.1925 | 1.0 |
80
- | 2.1777 | 0.1792 | 2800 | 2.1781 | 1.0 |
81
- | 2.1619 | 0.1856 | 2900 | 2.1638 | 0.939 |
82
- | 2.1491 | 0.192 | 3000 | 2.1497 | 0.939 |
83
- | 2.1344 | 0.1984 | 3100 | 2.1356 | 0.939 |
84
- | 2.121 | 0.2048 | 3200 | 2.1216 | 0.939 |
85
- | 2.1061 | 0.2112 | 3300 | 2.1077 | 0.939 |
86
- | 2.0934 | 0.2176 | 3400 | 2.0940 | 0.939 |
87
- | 2.0796 | 0.224 | 3500 | 2.0803 | 0.939 |
88
- | 2.0647 | 0.2304 | 3600 | 2.0668 | 0.939 |
89
- | 2.0533 | 0.2368 | 3700 | 2.0534 | 0.939 |
90
- | 2.0388 | 0.2432 | 3800 | 2.0401 | 0.939 |
91
- | 2.0257 | 0.2496 | 3900 | 2.0269 | 0.939 |
92
- | 2.0131 | 0.256 | 4000 | 2.0137 | 0.939 |
93
- | 1.9997 | 0.2624 | 4100 | 2.0008 | 0.939 |
94
- | 1.9897 | 0.2688 | 4200 | 1.9879 | 0.939 |
95
- | 1.9741 | 0.2752 | 4300 | 1.9752 | 0.939 |
96
- | 1.9622 | 0.2816 | 4400 | 1.9626 | 0.939 |
97
- | 1.9477 | 0.288 | 4500 | 1.9500 | 0.939 |
98
- | 1.9374 | 0.2944 | 4600 | 1.9376 | 0.939 |
99
- | 1.9238 | 0.3008 | 4700 | 1.9254 | 0.939 |
100
- | 1.9102 | 0.3072 | 4800 | 1.9131 | 0.939 |
101
- | 1.9014 | 0.3136 | 4900 | 1.9011 | 0.939 |
102
- | 1.8905 | 0.32 | 5000 | 1.8892 | 0.939 |
103
- | 1.8766 | 0.3264 | 5100 | 1.8774 | 0.939 |
104
- | 1.8675 | 0.3328 | 5200 | 1.8658 | 0.939 |
105
- | 1.8569 | 0.3392 | 5300 | 1.8543 | 0.939 |
106
- | 1.8427 | 0.3456 | 5400 | 1.8428 | 0.939 |
107
- | 1.8304 | 0.352 | 5500 | 1.8314 | 0.939 |
108
- | 1.8182 | 0.3584 | 5600 | 1.8201 | 0.939 |
109
- | 1.8086 | 0.3648 | 5700 | 1.8090 | 0.939 |
110
- | 1.7961 | 0.3712 | 5800 | 1.7981 | 0.939 |
111
- | 1.7859 | 0.3776 | 5900 | 1.7872 | 0.939 |
112
- | 1.7797 | 0.384 | 6000 | 1.7767 | 0.939 |
113
- | 1.7648 | 0.3904 | 6100 | 1.7662 | 0.939 |
114
- | 1.7551 | 0.3968 | 6200 | 1.7560 | 0.939 |
115
- | 1.7416 | 0.4032 | 6300 | 1.7458 | 1.0 |
116
- | 1.7383 | 0.4096 | 6400 | 1.7359 | 1.0 |
117
- | 1.7252 | 0.416 | 6500 | 1.7261 | 1.0 |
118
- | 1.7194 | 0.4224 | 6600 | 1.7165 | 1.0 |
119
- | 1.7048 | 0.4288 | 6700 | 1.7071 | 0.939 |
120
- | 1.6956 | 0.4352 | 6800 | 1.6977 | 1.0 |
121
- | 1.6874 | 0.4416 | 6900 | 1.6887 | 1.0 |
122
- | 1.6773 | 0.448 | 7000 | 1.6797 | 1.0 |
123
- | 1.6688 | 0.4544 | 7100 | 1.6709 | 1.0 |
124
- | 1.6595 | 0.4608 | 7200 | 1.6622 | 1.0 |
125
- | 1.6531 | 0.4672 | 7300 | 1.6538 | 1.0 |
126
- | 1.6412 | 0.4736 | 7400 | 1.6454 | 1.0 |
127
- | 1.6323 | 0.48 | 7500 | 1.6372 | 1.0 |
128
- | 1.6269 | 0.4864 | 7600 | 1.6292 | 1.0 |
129
- | 1.6186 | 0.4928 | 7700 | 1.6213 | 1.0 |
130
- | 1.6125 | 0.4992 | 7800 | 1.6136 | 1.0 |
131
- | 1.6068 | 0.5056 | 7900 | 1.6061 | 1.0 |
132
- | 1.5938 | 0.512 | 8000 | 1.5987 | 1.0 |
133
- | 1.5877 | 0.5184 | 8100 | 1.5915 | 1.0 |
134
- | 1.5857 | 0.5248 | 8200 | 1.5844 | 1.0 |
135
- | 1.5768 | 0.5312 | 8300 | 1.5775 | 1.0 |
136
- | 1.5727 | 0.5376 | 8400 | 1.5708 | 1.0 |
137
- | 1.5609 | 0.544 | 8500 | 1.5642 | 1.0 |
138
- | 1.5597 | 0.5504 | 8600 | 1.5577 | 1.0 |
139
- | 1.5546 | 0.5568 | 8700 | 1.5515 | 1.0 |
140
- | 1.5474 | 0.5632 | 8800 | 1.5454 | 1.0 |
141
- | 1.5393 | 0.5696 | 8900 | 1.5394 | 1.0 |
142
- | 1.5357 | 0.576 | 9000 | 1.5336 | 1.0 |
143
- | 1.5285 | 0.5824 | 9100 | 1.5279 | 1.0 |
144
- | 1.5279 | 0.5888 | 9200 | 1.5224 | 1.0 |
145
- | 1.5152 | 0.5952 | 9300 | 1.5171 | 1.0 |
146
- | 1.5106 | 0.6016 | 9400 | 1.5118 | 1.0 |
147
- | 1.5038 | 0.608 | 9500 | 1.5068 | 1.0 |
148
- | 1.5031 | 0.6144 | 9600 | 1.5019 | 1.0 |
149
- | 1.4928 | 0.6208 | 9700 | 1.4971 | 1.0 |
150
- | 1.5003 | 0.6272 | 9800 | 1.4925 | 1.0 |
151
- | 1.4862 | 0.6336 | 9900 | 1.4880 | 1.0 |
152
- | 1.4891 | 0.64 | 10000 | 1.4837 | 1.0 |
153
- | 1.4788 | 0.6464 | 10100 | 1.4795 | 1.0 |
154
- | 1.4705 | 0.6528 | 10200 | 1.4754 | 1.0 |
155
- | 1.469 | 0.6592 | 10300 | 1.4715 | 1.0 |
156
- | 1.4715 | 0.6656 | 10400 | 1.4677 | 1.0 |
157
- | 1.4615 | 0.672 | 10500 | 1.4640 | 1.0 |
158
- | 1.4611 | 0.6784 | 10600 | 1.4605 | 1.0 |
159
- | 1.4545 | 0.6848 | 10700 | 1.4570 | 1.0 |
160
- | 1.4506 | 0.6912 | 10800 | 1.4537 | 1.0 |
161
- | 1.4621 | 0.6976 | 10900 | 1.4506 | 1.0 |
162
- | 1.4437 | 0.704 | 11000 | 1.4475 | 1.0 |
163
- | 1.4524 | 0.7104 | 11100 | 1.4446 | 1.0 |
164
- | 1.4406 | 0.7168 | 11200 | 1.4418 | 1.0 |
165
- | 1.4373 | 0.7232 | 11300 | 1.4391 | 1.0 |
166
- | 1.432 | 0.7296 | 11400 | 1.4365 | 1.0 |
167
- | 1.4341 | 0.736 | 11500 | 1.4341 | 1.0 |
168
- | 1.4285 | 0.7424 | 11600 | 1.4317 | 1.0 |
169
- | 1.4269 | 0.7488 | 11700 | 1.4295 | 1.0 |
170
- | 1.4269 | 0.7552 | 11800 | 1.4273 | 1.0 |
171
- | 1.4282 | 0.7616 | 11900 | 1.4253 | 1.0 |
172
- | 1.4282 | 0.768 | 12000 | 1.4234 | 1.0 |
173
- | 1.4117 | 0.7744 | 12100 | 1.4216 | 1.0 |
174
- | 1.4186 | 0.7808 | 12200 | 1.4198 | 1.0 |
175
- | 1.4203 | 0.7872 | 12300 | 1.4182 | 1.0 |
176
- | 1.4168 | 0.7936 | 12400 | 1.4166 | 1.0 |
177
- | 1.412 | 0.8 | 12500 | 1.4152 | 1.0 |
178
- | 1.4106 | 0.8064 | 12600 | 1.4138 | 1.0 |
179
- | 1.4093 | 0.8128 | 12700 | 1.4125 | 1.0 |
180
- | 1.4176 | 0.8192 | 12800 | 1.4113 | 1.0 |
181
- | 1.4117 | 0.8256 | 12900 | 1.4102 | 1.0 |
182
- | 1.4093 | 0.832 | 13000 | 1.4092 | 1.0 |
183
- | 1.4083 | 0.8384 | 13100 | 1.4082 | 1.0 |
184
- | 1.404 | 0.8448 | 13200 | 1.4073 | 1.0 |
185
- | 1.4059 | 0.8512 | 13300 | 1.4065 | 1.0 |
186
- | 1.4038 | 0.8576 | 13400 | 1.4057 | 1.0 |
187
- | 1.4065 | 0.864 | 13500 | 1.4050 | 1.0 |
188
- | 1.4114 | 0.8704 | 13600 | 1.4044 | 1.0 |
189
- | 1.4019 | 0.8768 | 13700 | 1.4038 | 1.0 |
190
- | 1.4069 | 0.8832 | 13800 | 1.4033 | 1.0 |
191
- | 1.405 | 0.8896 | 13900 | 1.4028 | 1.0 |
192
- | 1.4067 | 0.896 | 14000 | 1.4024 | 1.0 |
193
- | 1.4035 | 0.9024 | 14100 | 1.4020 | 1.0 |
194
- | 1.4046 | 0.9088 | 14200 | 1.4017 | 1.0 |
195
- | 1.4036 | 0.9152 | 14300 | 1.4014 | 1.0 |
196
- | 1.3958 | 0.9216 | 14400 | 1.4012 | 1.0 |
197
- | 1.4011 | 0.928 | 14500 | 1.4010 | 1.0 |
198
- | 1.401 | 0.9344 | 14600 | 1.4008 | 1.0 |
199
- | 1.4022 | 0.9408 | 14700 | 1.4007 | 1.0 |
200
- | 1.3959 | 0.9472 | 14800 | 1.4005 | 1.0 |
201
- | 1.3951 | 0.9536 | 14900 | 1.4005 | 1.0 |
202
- | 1.393 | 0.96 | 15000 | 1.4004 | 1.0 |
203
- | 1.4074 | 0.9664 | 15100 | 1.4003 | 1.0 |
204
- | 1.3991 | 0.9728 | 15200 | 1.4003 | 1.0 |
205
- | 1.3929 | 0.9792 | 15300 | 1.4003 | 1.0 |
206
- | 1.4011 | 0.9856 | 15400 | 1.4003 | 1.0 |
207
- | 1.3928 | 0.992 | 15500 | 1.4003 | 1.0 |
208
- | 1.3956 | 0.9984 | 15600 | 1.4003 | 1.0 |
209
 
210
 
211
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.6781
20
+ - Accuracy: 0.546
21
 
22
  ## Model description
23
 
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
+ | No log | 0 | 0 | 2.6912 | 0.0 |
53
+ | 2.687 | 0.0064 | 100 | 2.6906 | 0.0 |
54
+ | 2.6844 | 0.0128 | 200 | 2.6886 | 0.0 |
55
+ | 2.6844 | 0.0192 | 300 | 2.6851 | 0.0 |
56
+ | 2.6854 | 0.0256 | 400 | 2.6796 | 0.0 |
57
+ | 2.6752 | 0.032 | 500 | 2.6708 | 0.0 |
58
+ | 2.6582 | 0.0384 | 600 | 2.6574 | 0.0 |
59
+ | 2.6385 | 0.0448 | 700 | 2.6386 | 0.0 |
60
+ | 2.6156 | 0.0512 | 800 | 2.6157 | 0.0 |
61
+ | 2.5971 | 0.0576 | 900 | 2.5925 | 0.0 |
62
+ | 2.5815 | 0.064 | 1000 | 2.5722 | 0.546 |
63
+ | 2.5522 | 0.0704 | 1100 | 2.5549 | 0.546 |
64
+ | 2.5304 | 0.0768 | 1200 | 2.5381 | 0.546 |
65
+ | 2.5259 | 0.0832 | 1300 | 2.5216 | 0.546 |
66
+ | 2.5204 | 0.0896 | 1400 | 2.5049 | 0.546 |
67
+ | 2.4877 | 0.096 | 1500 | 2.4880 | 0.546 |
68
+ | 2.4693 | 0.1024 | 1600 | 2.4704 | 0.546 |
69
+ | 2.458 | 0.1088 | 1700 | 2.4534 | 0.546 |
70
+ | 2.443 | 0.1152 | 1800 | 2.4378 | 0.546 |
71
+ | 2.4251 | 0.1216 | 1900 | 2.4235 | 0.546 |
72
+ | 2.4096 | 0.128 | 2000 | 2.4099 | 0.631 |
73
+ | 2.4012 | 0.1344 | 2100 | 2.3970 | 0.684 |
74
+ | 2.3777 | 0.1408 | 2200 | 2.3844 | 0.743 |
75
+ | 2.3667 | 0.1472 | 2300 | 2.3722 | 0.743 |
76
+ | 2.3664 | 0.1536 | 2400 | 2.3603 | 0.743 |
77
+ | 2.3481 | 0.16 | 2500 | 2.3486 | 0.743 |
78
+ | 2.3298 | 0.1664 | 2600 | 2.3369 | 0.743 |
79
+ | 2.3234 | 0.1728 | 2700 | 2.3255 | 0.743 |
80
+ | 2.3098 | 0.1792 | 2800 | 2.3141 | 0.743 |
81
+ | 2.3039 | 0.1856 | 2900 | 2.3029 | 0.743 |
82
+ | 2.2921 | 0.192 | 3000 | 2.2918 | 0.743 |
83
+ | 2.2831 | 0.1984 | 3100 | 2.2807 | 0.743 |
84
+ | 2.2629 | 0.2048 | 3200 | 2.2697 | 0.743 |
85
+ | 2.2523 | 0.2112 | 3300 | 2.2587 | 0.743 |
86
+ | 2.25 | 0.2176 | 3400 | 2.2478 | 0.743 |
87
+ | 2.2391 | 0.224 | 3500 | 2.2370 | 0.743 |
88
+ | 2.2191 | 0.2304 | 3600 | 2.2262 | 0.743 |
89
+ | 2.2187 | 0.2368 | 3700 | 2.2155 | 0.684 |
90
+ | 2.2022 | 0.2432 | 3800 | 2.2049 | 0.684 |
91
+ | 2.1899 | 0.2496 | 3900 | 2.1943 | 0.631 |
92
+ | 2.1841 | 0.256 | 4000 | 2.1838 | 0.631 |
93
+ | 2.171 | 0.2624 | 4100 | 2.1734 | 0.631 |
94
+ | 2.1758 | 0.2688 | 4200 | 2.1631 | 0.631 |
95
+ | 2.1515 | 0.2752 | 4300 | 2.1528 | 0.631 |
96
+ | 2.1436 | 0.2816 | 4400 | 2.1427 | 0.631 |
97
+ | 2.1283 | 0.288 | 4500 | 2.1326 | 0.631 |
98
+ | 2.1226 | 0.2944 | 4600 | 2.1226 | 0.631 |
99
+ | 2.1094 | 0.3008 | 4700 | 2.1127 | 0.583 |
100
+ | 2.0977 | 0.3072 | 4800 | 2.1029 | 0.583 |
101
+ | 2.0957 | 0.3136 | 4900 | 2.0932 | 0.546 |
102
+ | 2.0891 | 0.32 | 5000 | 2.0835 | 0.546 |
103
+ | 2.0747 | 0.3264 | 5100 | 2.0740 | 0.546 |
104
+ | 2.0697 | 0.3328 | 5200 | 2.0646 | 0.546 |
105
+ | 2.0644 | 0.3392 | 5300 | 2.0552 | 0.546 |
106
+ | 2.0489 | 0.3456 | 5400 | 2.0460 | 0.546 |
107
+ | 2.0377 | 0.352 | 5500 | 2.0369 | 0.546 |
108
+ | 2.0253 | 0.3584 | 5600 | 2.0278 | 0.546 |
109
+ | 2.0204 | 0.3648 | 5700 | 2.0189 | 0.546 |
110
+ | 2.0073 | 0.3712 | 5800 | 2.0101 | 0.546 |
111
+ | 2.0001 | 0.3776 | 5900 | 2.0014 | 0.546 |
112
+ | 2.0007 | 0.384 | 6000 | 1.9928 | 0.546 |
113
+ | 1.983 | 0.3904 | 6100 | 1.9843 | 0.546 |
114
+ | 1.9755 | 0.3968 | 6200 | 1.9760 | 0.546 |
115
+ | 1.9607 | 0.4032 | 6300 | 1.9678 | 0.546 |
116
+ | 1.966 | 0.4096 | 6400 | 1.9596 | 0.546 |
117
+ | 1.9511 | 0.416 | 6500 | 1.9516 | 0.546 |
118
+ | 1.9506 | 0.4224 | 6600 | 1.9437 | 0.546 |
119
+ | 1.933 | 0.4288 | 6700 | 1.9360 | 0.546 |
120
+ | 1.9257 | 0.4352 | 6800 | 1.9283 | 0.546 |
121
+ | 1.9197 | 0.4416 | 6900 | 1.9208 | 0.546 |
122
+ | 1.9103 | 0.448 | 7000 | 1.9134 | 0.546 |
123
+ | 1.9039 | 0.4544 | 7100 | 1.9062 | 0.546 |
124
+ | 1.8954 | 0.4608 | 7200 | 1.8990 | 0.546 |
125
+ | 1.8918 | 0.4672 | 7300 | 1.8919 | 0.546 |
126
+ | 1.8791 | 0.4736 | 7400 | 1.8851 | 0.546 |
127
+ | 1.8713 | 0.48 | 7500 | 1.8783 | 0.546 |
128
+ | 1.869 | 0.4864 | 7600 | 1.8716 | 0.546 |
129
+ | 1.8617 | 0.4928 | 7700 | 1.8651 | 0.546 |
130
+ | 1.8579 | 0.4992 | 7800 | 1.8587 | 0.546 |
131
+ | 1.8546 | 0.5056 | 7900 | 1.8524 | 0.546 |
132
+ | 1.8395 | 0.512 | 8000 | 1.8462 | 0.546 |
133
+ | 1.8351 | 0.5184 | 8100 | 1.8402 | 0.546 |
134
+ | 1.8368 | 0.5248 | 8200 | 1.8343 | 0.546 |
135
+ | 1.8282 | 0.5312 | 8300 | 1.8285 | 0.546 |
136
+ | 1.8264 | 0.5376 | 8400 | 1.8229 | 0.546 |
137
+ | 1.8133 | 0.544 | 8500 | 1.8174 | 0.546 |
138
+ | 1.8156 | 0.5504 | 8600 | 1.8120 | 0.546 |
139
+ | 1.8119 | 0.5568 | 8700 | 1.8067 | 0.546 |
140
+ | 1.805 | 0.5632 | 8800 | 1.8016 | 0.546 |
141
+ | 1.7968 | 0.5696 | 8900 | 1.7965 | 0.546 |
142
+ | 1.7948 | 0.576 | 9000 | 1.7917 | 0.546 |
143
+ | 1.7882 | 0.5824 | 9100 | 1.7869 | 0.546 |
144
+ | 1.7901 | 0.5888 | 9200 | 1.7822 | 0.546 |
145
+ | 1.7753 | 0.5952 | 9300 | 1.7777 | 0.546 |
146
+ | 1.7721 | 0.6016 | 9400 | 1.7733 | 0.546 |
147
+ | 1.7653 | 0.608 | 9500 | 1.7690 | 0.546 |
148
+ | 1.767 | 0.6144 | 9600 | 1.7649 | 0.546 |
149
+ | 1.7554 | 0.6208 | 9700 | 1.7608 | 0.546 |
150
+ | 1.7674 | 0.6272 | 9800 | 1.7569 | 0.546 |
151
+ | 1.751 | 0.6336 | 9900 | 1.7531 | 0.546 |
152
+ | 1.7567 | 0.64 | 10000 | 1.7494 | 0.546 |
153
+ | 1.745 | 0.6464 | 10100 | 1.7458 | 0.546 |
154
+ | 1.7365 | 0.6528 | 10200 | 1.7424 | 0.546 |
155
+ | 1.7361 | 0.6592 | 10300 | 1.7390 | 0.546 |
156
+ | 1.7411 | 0.6656 | 10400 | 1.7358 | 0.546 |
157
+ | 1.73 | 0.672 | 10500 | 1.7327 | 0.546 |
158
+ | 1.7308 | 0.6784 | 10600 | 1.7297 | 0.546 |
159
+ | 1.7237 | 0.6848 | 10700 | 1.7268 | 0.546 |
160
+ | 1.7205 | 0.6912 | 10800 | 1.7239 | 0.546 |
161
+ | 1.7357 | 0.6976 | 10900 | 1.7212 | 0.546 |
162
+ | 1.7142 | 0.704 | 11000 | 1.7186 | 0.546 |
163
+ | 1.7261 | 0.7104 | 11100 | 1.7161 | 0.546 |
164
+ | 1.7127 | 0.7168 | 11200 | 1.7137 | 0.546 |
165
+ | 1.7097 | 0.7232 | 11300 | 1.7114 | 0.546 |
166
+ | 1.7037 | 0.7296 | 11400 | 1.7092 | 0.546 |
167
+ | 1.7071 | 0.736 | 11500 | 1.7071 | 0.546 |
168
+ | 1.7015 | 0.7424 | 11600 | 1.7051 | 0.546 |
169
+ | 1.7005 | 0.7488 | 11700 | 1.7032 | 0.546 |
170
+ | 1.7013 | 0.7552 | 11800 | 1.7014 | 0.546 |
171
+ | 1.7037 | 0.7616 | 11900 | 1.6996 | 0.546 |
172
+ | 1.704 | 0.768 | 12000 | 1.6980 | 0.546 |
173
+ | 1.6848 | 0.7744 | 12100 | 1.6964 | 0.546 |
174
+ | 1.6937 | 0.7808 | 12200 | 1.6949 | 0.546 |
175
+ | 1.6961 | 0.7872 | 12300 | 1.6935 | 0.546 |
176
+ | 1.6927 | 0.7936 | 12400 | 1.6922 | 0.546 |
177
+ | 1.6875 | 0.8 | 12500 | 1.6909 | 0.546 |
178
+ | 1.6861 | 0.8064 | 12600 | 1.6897 | 0.546 |
179
+ | 1.6852 | 0.8128 | 12700 | 1.6886 | 0.546 |
180
+ | 1.6952 | 0.8192 | 12800 | 1.6876 | 0.546 |
181
+ | 1.6886 | 0.8256 | 12900 | 1.6866 | 0.546 |
182
+ | 1.6861 | 0.832 | 13000 | 1.6858 | 0.546 |
183
+ | 1.6854 | 0.8384 | 13100 | 1.6849 | 0.546 |
184
+ | 1.6805 | 0.8448 | 13200 | 1.6841 | 0.546 |
185
+ | 1.6831 | 0.8512 | 13300 | 1.6834 | 0.546 |
186
+ | 1.6805 | 0.8576 | 13400 | 1.6828 | 0.546 |
187
+ | 1.6843 | 0.864 | 13500 | 1.6822 | 0.546 |
188
+ | 1.6899 | 0.8704 | 13600 | 1.6816 | 0.546 |
189
+ | 1.6789 | 0.8768 | 13700 | 1.6811 | 0.546 |
190
+ | 1.6852 | 0.8832 | 13800 | 1.6807 | 0.546 |
191
+ | 1.683 | 0.8896 | 13900 | 1.6803 | 0.546 |
192
+ | 1.685 | 0.896 | 14000 | 1.6799 | 0.546 |
193
+ | 1.6816 | 0.9024 | 14100 | 1.6796 | 0.546 |
194
+ | 1.6831 | 0.9088 | 14200 | 1.6793 | 0.546 |
195
+ | 1.6819 | 0.9152 | 14300 | 1.6791 | 0.546 |
196
+ | 1.6726 | 0.9216 | 14400 | 1.6789 | 0.546 |
197
+ | 1.6788 | 0.928 | 14500 | 1.6787 | 0.546 |
198
+ | 1.679 | 0.9344 | 14600 | 1.6786 | 0.546 |
199
+ | 1.6809 | 0.9408 | 14700 | 1.6785 | 0.546 |
200
+ | 1.6732 | 0.9472 | 14800 | 1.6784 | 0.546 |
201
+ | 1.6722 | 0.9536 | 14900 | 1.6783 | 0.546 |
202
+ | 1.6701 | 0.96 | 15000 | 1.6782 | 0.546 |
203
+ | 1.6866 | 0.9664 | 15100 | 1.6782 | 0.546 |
204
+ | 1.6767 | 0.9728 | 15200 | 1.6781 | 0.546 |
205
+ | 1.6699 | 0.9792 | 15300 | 1.6781 | 0.546 |
206
+ | 1.6794 | 0.9856 | 15400 | 1.6781 | 0.546 |
207
+ | 1.6697 | 0.992 | 15500 | 1.6781 | 0.546 |
208
+ | 1.6732 | 0.9984 | 15600 | 1.6781 | 0.546 |
209
 
210
 
211
  ### Framework versions
config.json CHANGED
@@ -5,9 +5,9 @@
5
  "bias": true,
6
  "block_size": 256,
7
  "dropout": 0.0,
8
- "mlp_dim": 4,
9
  "model_type": "nanogpt",
10
- "n_embd": 8,
11
  "n_head": 1,
12
  "n_layer": 1,
13
  "nonlinearity": "RELU",
 
5
  "bias": true,
6
  "block_size": 256,
7
  "dropout": 0.0,
8
+ "mlp_dim": 1,
9
  "model_type": "nanogpt",
10
+ "n_embd": 6,
11
  "n_head": 1,
12
  "n_layer": 1,
13
  "nonlinearity": "RELU",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07c052f4b392e4f3dd47eeca68ba7ea6f3e27831d0e5d44aac23da9d2fbea621
3
- size 5872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aade9790166feb8dc654c0ad7eb06b557cb56edf72334f41e2d38715f094459
3
+ size 3240
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18639df385ca1d08af21c0a140152f17a00f2da79a38258c2f965a39f736653f
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8771e94380d5a22f9f2c18550eb28573db1ae00f28340556c7fc63571dccfc
3
  size 5240