Model save
Browse files- README.md +205 -65
- config.json +1 -1
- model.safetensors +2 -2
- training_args.bin +1 -1
- vocab.json +0 -0
- vocab_fix age.txt +0 -0
README.md
CHANGED
|
@@ -13,8 +13,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 13 |
|
| 14 |
This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
|
| 15 |
It achieves the following results on the evaluation set:
|
| 16 |
-
- Loss:
|
| 17 |
-
- Num Input Tokens Seen:
|
| 18 |
|
| 19 |
## Model description
|
| 20 |
|
|
@@ -40,72 +40,212 @@ The following hyperparameters were used during training:
|
|
| 40 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 41 |
- lr_scheduler_type: linear
|
| 42 |
- lr_scheduler_warmup_ratio: 0.1
|
| 43 |
-
- training_steps:
|
| 44 |
|
| 45 |
### Training results
|
| 46 |
|
| 47 |
-
| Training Loss | Epoch | Step
|
| 48 |
-
|
| 49 |
-
|
|
| 50 |
-
|
|
| 51 |
-
|
|
| 52 |
-
|
|
| 53 |
-
|
|
| 54 |
-
|
|
| 55 |
-
|
|
| 56 |
-
|
|
| 57 |
-
|
|
| 58 |
-
|
|
| 59 |
-
| 1.
|
| 60 |
-
| 1.
|
| 61 |
-
| 1.
|
| 62 |
-
| 1.
|
| 63 |
-
| 1.
|
| 64 |
-
| 1.
|
| 65 |
-
| 1.
|
| 66 |
-
| 1.
|
| 67 |
-
| 1.
|
| 68 |
-
| 1.
|
| 69 |
-
| 1.
|
| 70 |
-
| 1.
|
| 71 |
-
| 1.
|
| 72 |
-
| 1.
|
| 73 |
-
| 1.
|
| 74 |
-
| 1.
|
| 75 |
-
| 1.
|
| 76 |
-
| 1.
|
| 77 |
-
| 1.
|
| 78 |
-
| 1.
|
| 79 |
-
| 1.
|
| 80 |
-
| 1.
|
| 81 |
-
| 1.
|
| 82 |
-
| 1.
|
| 83 |
-
| 1.
|
| 84 |
-
| 1.
|
| 85 |
-
| 1.
|
| 86 |
-
| 1.
|
| 87 |
-
| 1.
|
| 88 |
-
| 1.
|
| 89 |
-
| 1.
|
| 90 |
-
| 1.
|
| 91 |
-
|
|
| 92 |
-
|
|
| 93 |
-
|
|
| 94 |
-
| 1.
|
| 95 |
-
| 1.
|
| 96 |
-
| 1.
|
| 97 |
-
| 1.
|
| 98 |
-
|
|
| 99 |
-
|
|
| 100 |
-
| 1.
|
| 101 |
-
|
|
| 102 |
-
|
|
| 103 |
-
| 1.
|
| 104 |
-
|
|
| 105 |
-
|
|
| 106 |
-
|
|
| 107 |
-
|
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
|
| 111 |
### Framework versions
|
|
|
|
| 13 |
|
| 14 |
This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
|
| 15 |
It achieves the following results on the evaluation set:
|
| 16 |
+
- Loss: 1.0120
|
| 17 |
+
- Num Input Tokens Seen: 8192000000
|
| 18 |
|
| 19 |
## Model description
|
| 20 |
|
|
|
|
| 40 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 41 |
- lr_scheduler_type: linear
|
| 42 |
- lr_scheduler_warmup_ratio: 0.1
|
| 43 |
+
- training_steps: 1000000
|
| 44 |
|
| 45 |
### Training results
|
| 46 |
|
| 47 |
+
| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen |
|
| 48 |
+
|:-------------:|:-----:|:-------:|:---------------:|:-----------------:|
|
| 49 |
+
| 6.663 | 0.25 | 5000 | 6.6552 | 40960000 |
|
| 50 |
+
| 5.8854 | 0.49 | 10000 | 5.8635 | 81920000 |
|
| 51 |
+
| 5.5478 | 0.74 | 15000 | 5.4674 | 122880000 |
|
| 52 |
+
| 5.1915 | 0.98 | 20000 | 5.1096 | 163840000 |
|
| 53 |
+
| 4.4684 | 1.23 | 25000 | 4.4560 | 204800000 |
|
| 54 |
+
| 3.7495 | 1.47 | 30000 | 3.6515 | 245760000 |
|
| 55 |
+
| 3.3969 | 1.72 | 35000 | 3.3215 | 286720000 |
|
| 56 |
+
| 3.2418 | 1.97 | 40000 | 3.0943 | 327680000 |
|
| 57 |
+
| 2.7464 | 2.21 | 45000 | 2.5451 | 368640000 |
|
| 58 |
+
| 2.2447 | 2.46 | 50000 | 2.1026 | 409600000 |
|
| 59 |
+
| 1.8854 | 2.7 | 55000 | 1.8477 | 450560000 |
|
| 60 |
+
| 1.7938 | 2.95 | 60000 | 1.6798 | 491520000 |
|
| 61 |
+
| 1.6738 | 3.2 | 65000 | 1.5679 | 532480000 |
|
| 62 |
+
| 1.6701 | 3.44 | 70000 | 1.5054 | 573440000 |
|
| 63 |
+
| 1.485 | 3.69 | 75000 | 1.4480 | 614400000 |
|
| 64 |
+
| 1.5258 | 3.93 | 80000 | 1.4007 | 655360000 |
|
| 65 |
+
| 1.4916 | 4.18 | 85000 | 1.3906 | 696320000 |
|
| 66 |
+
| 1.4113 | 4.42 | 90000 | 1.3684 | 737280000 |
|
| 67 |
+
| 1.4387 | 4.67 | 95000 | 1.3493 | 778240000 |
|
| 68 |
+
| 1.388 | 4.92 | 100000 | 1.3386 | 819200000 |
|
| 69 |
+
| 1.346 | 5.16 | 105000 | 1.3352 | 860160000 |
|
| 70 |
+
| 1.3504 | 5.41 | 110000 | 1.3294 | 901120000 |
|
| 71 |
+
| 1.3432 | 5.65 | 115000 | 1.3168 | 942080000 |
|
| 72 |
+
| 1.2821 | 5.9 | 120000 | 1.3041 | 983040000 |
|
| 73 |
+
| 1.2748 | 6.15 | 125000 | 1.2871 | 1024000000 |
|
| 74 |
+
| 1.3076 | 6.39 | 130000 | 1.2783 | 1064960000 |
|
| 75 |
+
| 1.3397 | 6.64 | 135000 | 1.2690 | 1105920000 |
|
| 76 |
+
| 1.301 | 6.88 | 140000 | 1.2653 | 1146880000 |
|
| 77 |
+
| 1.2416 | 7.13 | 145000 | 1.2584 | 1187840000 |
|
| 78 |
+
| 1.2513 | 7.37 | 150000 | 1.2515 | 1228800000 |
|
| 79 |
+
| 1.2618 | 7.62 | 155000 | 1.2415 | 1269760000 |
|
| 80 |
+
| 1.2366 | 7.87 | 160000 | 1.2399 | 1310720000 |
|
| 81 |
+
| 1.2584 | 8.11 | 165000 | 1.2245 | 1351680000 |
|
| 82 |
+
| 1.1951 | 8.36 | 170000 | 1.2225 | 1392640000 |
|
| 83 |
+
| 1.2576 | 8.6 | 175000 | 1.2286 | 1433600000 |
|
| 84 |
+
| 1.278 | 8.85 | 180000 | 1.2140 | 1474560000 |
|
| 85 |
+
| 1.1975 | 9.09 | 185000 | 1.2103 | 1515520000 |
|
| 86 |
+
| 1.1596 | 9.34 | 190000 | 1.2052 | 1556480000 |
|
| 87 |
+
| 1.2061 | 9.59 | 195000 | 1.2034 | 1597440000 |
|
| 88 |
+
| 1.1677 | 9.83 | 200000 | 1.2079 | 1638400000 |
|
| 89 |
+
| 1.1977 | 10.08 | 205000 | 1.1966 | 1679360000 |
|
| 90 |
+
| 1.1448 | 10.32 | 210000 | 1.2031 | 1720320000 |
|
| 91 |
+
| 1.1119 | 10.57 | 215000 | 1.1866 | 1761280000 |
|
| 92 |
+
| 1.1695 | 10.82 | 220000 | 1.1823 | 1802240000 |
|
| 93 |
+
| 1.0998 | 11.06 | 225000 | 1.1874 | 1843200000 |
|
| 94 |
+
| 1.1157 | 11.31 | 230000 | 1.1791 | 1884160000 |
|
| 95 |
+
| 1.191 | 11.55 | 235000 | 1.1802 | 1925120000 |
|
| 96 |
+
| 1.1884 | 11.8 | 240000 | 1.1706 | 1966080000 |
|
| 97 |
+
| 1.1723 | 12.04 | 245000 | 1.1750 | 2007040000 |
|
| 98 |
+
| 1.1576 | 12.29 | 250000 | 1.1720 | 2048000000 |
|
| 99 |
+
| 1.1847 | 12.54 | 255000 | 1.1596 | 2088960000 |
|
| 100 |
+
| 1.1229 | 12.78 | 260000 | 1.1594 | 2129920000 |
|
| 101 |
+
| 1.1683 | 13.03 | 265000 | 1.1550 | 2170880000 |
|
| 102 |
+
| 1.1718 | 13.27 | 270000 | 1.1511 | 2211840000 |
|
| 103 |
+
| 1.1374 | 13.52 | 275000 | 1.1531 | 2252800000 |
|
| 104 |
+
| 1.1199 | 13.77 | 280000 | 1.1615 | 2293760000 |
|
| 105 |
+
| 1.1275 | 14.01 | 285000 | 1.1555 | 2334720000 |
|
| 106 |
+
| 1.1267 | 14.26 | 290000 | 1.1442 | 2375680000 |
|
| 107 |
+
| 1.1603 | 14.5 | 295000 | 1.1426 | 2416640000 |
|
| 108 |
+
| 1.1739 | 14.75 | 300000 | 1.1443 | 2457600000 |
|
| 109 |
+
| 1.1022 | 14.99 | 305000 | 1.1438 | 2498560000 |
|
| 110 |
+
| 1.1225 | 15.24 | 310000 | 1.1323 | 2539520000 |
|
| 111 |
+
| 1.1244 | 15.49 | 315000 | 1.1389 | 2580480000 |
|
| 112 |
+
| 1.1358 | 15.73 | 320000 | 1.1377 | 2621440000 |
|
| 113 |
+
| 1.1499 | 15.98 | 325000 | 1.1318 | 2662400000 |
|
| 114 |
+
| 1.1266 | 16.22 | 330000 | 1.1313 | 2703360000 |
|
| 115 |
+
| 1.1604 | 16.47 | 335000 | 1.1264 | 2744320000 |
|
| 116 |
+
| 1.0391 | 16.72 | 340000 | 1.1364 | 2785280000 |
|
| 117 |
+
| 1.1526 | 16.96 | 345000 | 1.1289 | 2826240000 |
|
| 118 |
+
| 1.1299 | 17.21 | 350000 | 1.1259 | 2867200000 |
|
| 119 |
+
| 1.1118 | 17.45 | 355000 | 1.1238 | 2908160000 |
|
| 120 |
+
| 1.1049 | 17.7 | 360000 | 1.1193 | 2949120000 |
|
| 121 |
+
| 1.1336 | 17.94 | 365000 | 1.1211 | 2990080000 |
|
| 122 |
+
| 1.0504 | 18.19 | 370000 | 1.1218 | 3031040000 |
|
| 123 |
+
| 1.1003 | 18.44 | 375000 | 1.1174 | 3072000000 |
|
| 124 |
+
| 1.1284 | 18.68 | 380000 | 1.1164 | 3112960000 |
|
| 125 |
+
| 1.1408 | 18.93 | 385000 | 1.1115 | 3153920000 |
|
| 126 |
+
| 1.0548 | 19.17 | 390000 | 1.1112 | 3194880000 |
|
| 127 |
+
| 1.1045 | 19.42 | 395000 | 1.1102 | 3235840000 |
|
| 128 |
+
| 1.0618 | 19.66 | 400000 | 1.1075 | 3276800000 |
|
| 129 |
+
| 1.0953 | 19.91 | 405000 | 1.1070 | 3317760000 |
|
| 130 |
+
| 1.1543 | 20.16 | 410000 | 1.1071 | 3358720000 |
|
| 131 |
+
| 1.1212 | 20.4 | 415000 | 1.1032 | 3399680000 |
|
| 132 |
+
| 1.0678 | 20.65 | 420000 | 1.1007 | 3440640000 |
|
| 133 |
+
| 1.0646 | 20.89 | 425000 | 1.0982 | 3481600000 |
|
| 134 |
+
| 1.1047 | 21.14 | 430000 | 1.1022 | 3522560000 |
|
| 135 |
+
| 1.092 | 21.39 | 435000 | 1.0978 | 3563520000 |
|
| 136 |
+
| 1.0619 | 21.63 | 440000 | 1.1075 | 3604480000 |
|
| 137 |
+
| 1.0233 | 21.88 | 445000 | 1.0954 | 3645440000 |
|
| 138 |
+
| 1.0962 | 22.12 | 450000 | 1.0891 | 3686400000 |
|
| 139 |
+
| 1.0733 | 22.37 | 455000 | 1.0932 | 3727360000 |
|
| 140 |
+
| 1.1267 | 22.61 | 460000 | 1.0935 | 3768320000 |
|
| 141 |
+
| 1.053 | 22.86 | 465000 | 1.0904 | 3809280000 |
|
| 142 |
+
| 1.0558 | 23.11 | 470000 | 1.0901 | 3850240000 |
|
| 143 |
+
| 1.0324 | 23.35 | 475000 | 1.0955 | 3891200000 |
|
| 144 |
+
| 1.0651 | 23.6 | 480000 | 1.0891 | 3932160000 |
|
| 145 |
+
| 1.0774 | 23.84 | 485000 | 1.0901 | 3973120000 |
|
| 146 |
+
| 1.0929 | 24.09 | 490000 | 1.0833 | 4014080000 |
|
| 147 |
+
| 1.0516 | 24.34 | 495000 | 1.0805 | 4055040000 |
|
| 148 |
+
| 1.0482 | 24.58 | 500000 | 1.0846 | 4096000000 |
|
| 149 |
+
| 1.1004 | 24.83 | 505000 | 1.0802 | 4136960000 |
|
| 150 |
+
| 1.1119 | 25.07 | 510000 | 1.0765 | 4177920000 |
|
| 151 |
+
| 1.0799 | 25.32 | 515000 | 1.0843 | 4218880000 |
|
| 152 |
+
| 1.0794 | 25.56 | 520000 | 1.0801 | 4259840000 |
|
| 153 |
+
| 1.0681 | 25.81 | 525000 | 1.0785 | 4300800000 |
|
| 154 |
+
| 1.0183 | 26.06 | 530000 | 1.0760 | 4341760000 |
|
| 155 |
+
| 1.0791 | 26.3 | 535000 | 1.0722 | 4382720000 |
|
| 156 |
+
| 1.0285 | 26.55 | 540000 | 1.0754 | 4423680000 |
|
| 157 |
+
| 1.0474 | 26.79 | 545000 | 1.0688 | 4464640000 |
|
| 158 |
+
| 1.0258 | 27.04 | 550000 | 1.0755 | 4505600000 |
|
| 159 |
+
| 1.0374 | 27.28 | 555000 | 1.0677 | 4546560000 |
|
| 160 |
+
| 1.0385 | 27.53 | 560000 | 1.0698 | 4587520000 |
|
| 161 |
+
| 1.1287 | 27.78 | 565000 | 1.0692 | 4628480000 |
|
| 162 |
+
| 1.0774 | 28.02 | 570000 | 1.0671 | 4669440000 |
|
| 163 |
+
| 1.0264 | 28.27 | 575000 | 1.0692 | 4710400000 |
|
| 164 |
+
| 1.0452 | 28.51 | 580000 | 1.0676 | 4751360000 |
|
| 165 |
+
| 1.1144 | 28.76 | 585000 | 1.0663 | 4792320000 |
|
| 166 |
+
| 1.0485 | 29.01 | 590000 | 1.0658 | 4833280000 |
|
| 167 |
+
| 1.0556 | 29.25 | 595000 | 1.0651 | 4874240000 |
|
| 168 |
+
| 0.996 | 29.5 | 600000 | 1.0616 | 4915200000 |
|
| 169 |
+
| 1.0448 | 29.74 | 605000 | 1.0665 | 4956160000 |
|
| 170 |
+
| 1.0094 | 29.99 | 610000 | 1.0624 | 4997120000 |
|
| 171 |
+
| 1.0799 | 30.23 | 615000 | 1.0605 | 5038080000 |
|
| 172 |
+
| 0.9995 | 30.48 | 620000 | 1.0609 | 5079040000 |
|
| 173 |
+
| 1.0429 | 30.73 | 625000 | 1.0616 | 5120000000 |
|
| 174 |
+
| 0.9966 | 30.97 | 630000 | 1.0600 | 5160960000 |
|
| 175 |
+
| 1.0508 | 31.22 | 635000 | 1.0576 | 5201920000 |
|
| 176 |
+
| 0.9879 | 31.46 | 640000 | 1.0554 | 5242880000 |
|
| 177 |
+
| 1.0473 | 31.71 | 645000 | 1.0581 | 5283840000 |
|
| 178 |
+
| 1.0364 | 31.96 | 650000 | 1.0529 | 5324800000 |
|
| 179 |
+
| 1.0667 | 32.2 | 655000 | 1.0567 | 5365760000 |
|
| 180 |
+
| 1.0108 | 32.45 | 660000 | 1.0517 | 5406720000 |
|
| 181 |
+
| 0.9932 | 32.69 | 665000 | 1.0550 | 5447680000 |
|
| 182 |
+
| 0.9917 | 32.94 | 670000 | 1.0482 | 5488640000 |
|
| 183 |
+
| 1.0368 | 33.18 | 675000 | 1.0519 | 5529600000 |
|
| 184 |
+
| 1.0942 | 33.43 | 680000 | 1.0448 | 5570560000 |
|
| 185 |
+
| 1.0851 | 33.68 | 685000 | 1.0484 | 5611520000 |
|
| 186 |
+
| 1.0568 | 33.92 | 690000 | 1.0460 | 5652480000 |
|
| 187 |
+
| 1.0175 | 34.17 | 695000 | 1.0484 | 5693440000 |
|
| 188 |
+
| 1.0051 | 34.41 | 700000 | 1.0480 | 5734400000 |
|
| 189 |
+
| 1.0143 | 34.66 | 705000 | 1.0443 | 5775360000 |
|
| 190 |
+
| 1.043 | 34.9 | 710000 | 1.0429 | 5816320000 |
|
| 191 |
+
| 1.0354 | 35.15 | 715000 | 1.0425 | 5857280000 |
|
| 192 |
+
| 1.0394 | 35.4 | 720000 | 1.0442 | 5898240000 |
|
| 193 |
+
| 1.0074 | 35.64 | 725000 | 1.0417 | 5939200000 |
|
| 194 |
+
| 1.0632 | 35.89 | 730000 | 1.0446 | 5980160000 |
|
| 195 |
+
| 1.0117 | 36.13 | 735000 | 1.0428 | 6021120000 |
|
| 196 |
+
| 1.0202 | 36.38 | 740000 | 1.0403 | 6062080000 |
|
| 197 |
+
| 1.0315 | 36.63 | 745000 | 1.0385 | 6103040000 |
|
| 198 |
+
| 0.9871 | 36.87 | 750000 | 1.0380 | 6144000000 |
|
| 199 |
+
| 0.9502 | 37.12 | 755000 | 1.0351 | 6184960000 |
|
| 200 |
+
| 1.0433 | 37.36 | 760000 | 1.0398 | 6225920000 |
|
| 201 |
+
| 1.0148 | 37.61 | 765000 | 1.0364 | 6266880000 |
|
| 202 |
+
| 0.9534 | 37.85 | 770000 | 1.0380 | 6307840000 |
|
| 203 |
+
| 0.9569 | 38.1 | 775000 | 1.0334 | 6348800000 |
|
| 204 |
+
| 1.0426 | 38.35 | 780000 | 1.0338 | 6389760000 |
|
| 205 |
+
| 0.9923 | 38.59 | 785000 | 1.0335 | 6430720000 |
|
| 206 |
+
| 1.0107 | 38.84 | 790000 | 1.0325 | 6471680000 |
|
| 207 |
+
| 1.0252 | 39.08 | 795000 | 1.0362 | 6512640000 |
|
| 208 |
+
| 1.0201 | 39.33 | 800000 | 1.0332 | 6553600000 |
|
| 209 |
+
| 1.0066 | 39.58 | 805000 | 1.0295 | 6594560000 |
|
| 210 |
+
| 0.9832 | 39.82 | 810000 | 1.0325 | 6635520000 |
|
| 211 |
+
| 0.9948 | 40.07 | 815000 | 1.0338 | 6676480000 |
|
| 212 |
+
| 1.0046 | 40.31 | 820000 | 1.0299 | 6717440000 |
|
| 213 |
+
| 1.0472 | 40.56 | 825000 | 1.0308 | 6758400000 |
|
| 214 |
+
| 1.0781 | 40.8 | 830000 | 1.0276 | 6799360000 |
|
| 215 |
+
| 0.9824 | 41.05 | 835000 | 1.0230 | 6840320000 |
|
| 216 |
+
| 0.9976 | 41.3 | 840000 | 1.0262 | 6881280000 |
|
| 217 |
+
| 0.9951 | 41.54 | 845000 | 1.0228 | 6922240000 |
|
| 218 |
+
| 1.0125 | 41.79 | 850000 | 1.0277 | 6963200000 |
|
| 219 |
+
| 0.973 | 42.03 | 855000 | 1.0245 | 7004160000 |
|
| 220 |
+
| 0.9853 | 42.28 | 860000 | 1.0284 | 7045120000 |
|
| 221 |
+
| 1.0991 | 42.52 | 865000 | 1.0244 | 7086080000 |
|
| 222 |
+
| 1.0388 | 42.77 | 870000 | 1.0249 | 7127040000 |
|
| 223 |
+
| 0.9513 | 43.02 | 875000 | 1.0256 | 7168000000 |
|
| 224 |
+
| 0.9948 | 43.26 | 880000 | 1.0250 | 7208960000 |
|
| 225 |
+
| 1.0032 | 43.51 | 885000 | 1.0180 | 7249920000 |
|
| 226 |
+
| 0.9846 | 43.75 | 890000 | 1.0231 | 7290880000 |
|
| 227 |
+
| 0.9591 | 44.0 | 895000 | 1.0202 | 7331840000 |
|
| 228 |
+
| 0.9872 | 44.25 | 900000 | 1.0186 | 7372800000 |
|
| 229 |
+
| 0.9491 | 44.49 | 905000 | 1.0202 | 7413760000 |
|
| 230 |
+
| 0.9904 | 44.74 | 910000 | 1.0201 | 7454720000 |
|
| 231 |
+
| 1.0316 | 44.98 | 915000 | 1.0207 | 7495680000 |
|
| 232 |
+
| 0.9535 | 45.23 | 920000 | 1.0146 | 7536640000 |
|
| 233 |
+
| 0.9543 | 45.47 | 925000 | 1.0189 | 7577600000 |
|
| 234 |
+
| 0.9583 | 45.72 | 930000 | 1.0172 | 7618560000 |
|
| 235 |
+
| 1.0065 | 45.97 | 935000 | 1.0179 | 7659520000 |
|
| 236 |
+
| 0.9711 | 46.21 | 940000 | 1.0181 | 7700480000 |
|
| 237 |
+
| 0.9815 | 46.46 | 945000 | 1.0152 | 7741440000 |
|
| 238 |
+
| 1.0238 | 46.7 | 950000 | 1.0128 | 7782400000 |
|
| 239 |
+
| 0.9362 | 46.95 | 955000 | 1.0136 | 7823360000 |
|
| 240 |
+
| 1.0079 | 47.2 | 960000 | 1.0152 | 7864320000 |
|
| 241 |
+
| 0.9533 | 47.44 | 965000 | 1.0155 | 7905280000 |
|
| 242 |
+
| 0.9806 | 47.69 | 970000 | 1.0149 | 7946240000 |
|
| 243 |
+
| 0.9816 | 47.93 | 975000 | 1.0132 | 7987200000 |
|
| 244 |
+
| 0.9743 | 48.18 | 980000 | 1.0160 | 8028160000 |
|
| 245 |
+
| 0.9028 | 48.42 | 985000 | 1.0148 | 8069120000 |
|
| 246 |
+
| 0.957 | 48.67 | 990000 | 1.0147 | 8110080000 |
|
| 247 |
+
| 0.9769 | 48.92 | 995000 | 1.0142 | 8151040000 |
|
| 248 |
+
| 1.0092 | 49.16 | 1000000 | 1.0120 | 8192000000 |
|
| 249 |
|
| 250 |
|
| 251 |
### Framework versions
|
config.json
CHANGED
|
@@ -20,5 +20,5 @@
|
|
| 20 |
"transformers_version": "4.36.2",
|
| 21 |
"type_vocab_size": 2,
|
| 22 |
"use_cache": true,
|
| 23 |
-
"vocab_size":
|
| 24 |
}
|
|
|
|
| 20 |
"transformers_version": "4.36.2",
|
| 21 |
"type_vocab_size": 2,
|
| 22 |
"use_cache": true,
|
| 23 |
+
"vocab_size": 26786
|
| 24 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b47fee46269708b791ddedf1df1672c870a04af1ae99fa7c438308ec312fac39
|
| 3 |
+
size 426588944
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75645c04af175bbdbfb46069d6a5d059dd0790f9b9924e8df63339a4369e6239
|
| 3 |
size 4728
|
vocab.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vocab_fix age.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|