Premier modèle d'architecture gemma from scratch
Browse files- README.md +1 -1
- tokenizer.json +47 -10
README.md
CHANGED
|
@@ -39,7 +39,7 @@ The following hyperparameters were used during training:
|
|
| 39 |
- total_train_batch_size: 32
|
| 40 |
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 41 |
- lr_scheduler_type: linear
|
| 42 |
-
- num_epochs:
|
| 43 |
- mixed_precision_training: Native AMP
|
| 44 |
|
| 45 |
### Training results
|
|
|
|
| 39 |
- total_train_batch_size: 32
|
| 40 |
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 41 |
- lr_scheduler_type: linear
|
| 42 |
+
- num_epochs: 3
|
| 43 |
- mixed_precision_training: Native AMP
|
| 44 |
|
| 45 |
### Training results
|
tokenizer.json
CHANGED
|
@@ -150,16 +150,53 @@
|
|
| 150 |
"<mask>": 4,
|
| 151 |
"Ever:": 5,
|
| 152 |
"Small:": 6,
|
| 153 |
-
"
|
| 154 |
-
"
|
| 155 |
-
"
|
| 156 |
-
"
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
-
"
|
| 161 |
-
"
|
| 162 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
},
|
| 164 |
"merges": []
|
| 165 |
}
|
|
|
|
| 150 |
"<mask>": 4,
|
| 151 |
"Ever:": 5,
|
| 152 |
"Small:": 6,
|
| 153 |
+
"▁": 7,
|
| 154 |
+
"a": 8,
|
| 155 |
+
"i": 9,
|
| 156 |
+
"e": 10,
|
| 157 |
+
"n": 11,
|
| 158 |
+
"l": 12,
|
| 159 |
+
"o": 13,
|
| 160 |
+
"r": 14,
|
| 161 |
+
"y": 15,
|
| 162 |
+
"m": 16,
|
| 163 |
+
"t": 17,
|
| 164 |
+
"u": 18,
|
| 165 |
+
"c": 19,
|
| 166 |
+
"s": 20,
|
| 167 |
+
"p": 21,
|
| 168 |
+
"k": 22,
|
| 169 |
+
"d": 23,
|
| 170 |
+
"j": 24,
|
| 171 |
+
"z": 25,
|
| 172 |
+
"g": 26,
|
| 173 |
+
"b": 27,
|
| 174 |
+
"h": 28,
|
| 175 |
+
"v": 29,
|
| 176 |
+
"A": 30,
|
| 177 |
+
"f": 31,
|
| 178 |
+
"q": 32,
|
| 179 |
+
"x": 33,
|
| 180 |
+
"I": 34,
|
| 181 |
+
"Y": 35,
|
| 182 |
+
"E": 36,
|
| 183 |
+
"D": 37,
|
| 184 |
+
"L": 38,
|
| 185 |
+
"M": 39,
|
| 186 |
+
"N": 40,
|
| 187 |
+
"O": 41,
|
| 188 |
+
"w": 42,
|
| 189 |
+
"F": 43,
|
| 190 |
+
"P": 44,
|
| 191 |
+
"U": 45,
|
| 192 |
+
"W": 46,
|
| 193 |
+
"X": 47,
|
| 194 |
+
"B": 48,
|
| 195 |
+
"G": 49,
|
| 196 |
+
"J": 50,
|
| 197 |
+
"Q": 51,
|
| 198 |
+
"S": 52,
|
| 199 |
+
"T": 53
|
| 200 |
},
|
| 201 |
"merges": []
|
| 202 |
}
|