Update README.md
Browse files
README.md
CHANGED
|
@@ -31,7 +31,7 @@ def gen_text(batch_size: int = 5000):
|
|
| 31 |
|
| 32 |
tokenizer.train_from_iterator(
|
| 33 |
gen_text(),
|
| 34 |
-
vocab_size=50265,
|
| 35 |
min_frequency=2,
|
| 36 |
special_tokens=[
|
| 37 |
"<s>",
|
|
|
|
| 31 |
|
| 32 |
tokenizer.train_from_iterator(
|
| 33 |
gen_text(),
|
| 34 |
+
vocab_size=50265, # roberta-base와 같은 크기
|
| 35 |
min_frequency=2,
|
| 36 |
special_tokens=[
|
| 37 |
"<s>",
|