Release v1.1: PMI Phrase Merging & Smart Morphology
Browse files- tokenization_df_arc.py +6 -0
tokenization_df_arc.py
CHANGED
|
@@ -179,6 +179,12 @@ class DFArcTokenizer(PreTrainedTokenizerFast):
|
|
| 179 |
morphological segmentation, and phrase merging before tokenization.
|
| 180 |
"""
|
| 181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
def __init__(
|
| 183 |
self,
|
| 184 |
vocab_file: Optional[str] = None,
|
|
|
|
| 179 |
morphological segmentation, and phrase merging before tokenization.
|
| 180 |
"""
|
| 181 |
|
| 182 |
+
vocab_files_names = {
|
| 183 |
+
"vocab_file": "tokenizer.json",
|
| 184 |
+
"tokenizer_file": "tokenizer.json",
|
| 185 |
+
"phrases_file": "phrase_vocab.json"
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
def __init__(
|
| 189 |
self,
|
| 190 |
vocab_file: Optional[str] = None,
|