Arabic
arabic
tokenizer
morphology
nlp
dialect
fr3on commited on
Commit
4730d8d
·
verified ·
1 Parent(s): acdecdf

Release v1.1: PMI Phrase Merging & Smart Morphology

Browse files
Files changed (1) hide show
  1. tokenization_df_arc.py +6 -0
tokenization_df_arc.py CHANGED
@@ -179,6 +179,12 @@ class DFArcTokenizer(PreTrainedTokenizerFast):
179
  morphological segmentation, and phrase merging before tokenization.
180
  """
181
 
 
 
 
 
 
 
182
  def __init__(
183
  self,
184
  vocab_file: Optional[str] = None,
 
179
  morphological segmentation, and phrase merging before tokenization.
180
  """
181
 
182
+ vocab_files_names = {
183
+ "vocab_file": "tokenizer.json",
184
+ "tokenizer_file": "tokenizer.json",
185
+ "phrases_file": "phrase_vocab.json"
186
+ }
187
+
188
  def __init__(
189
  self,
190
  vocab_file: Optional[str] = None,