Arabic
arabic
tokenizer
morphology
nlp
dialect
fr3on commited on
Commit
37aebb1
·
verified ·
1 Parent(s): 8941194

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenization_df_arc.py +0 -5
tokenization_df_arc.py CHANGED
@@ -213,11 +213,6 @@ class DFArcTokenizer(PreTrainedTokenizerFast):
213
  text = self.phrase_helper.merge_phrases(text)
214
  return super().encode(text, *args, **kwargs)
215
 
216
- def _encode_plus(self, text, *args, **kwargs):
217
- if isinstance(text, str):
218
- text = self.normalizer_helper.normalize(text)
219
- text = self.morph_helper.segment_text(text)
220
- text = self.phrase_helper.merge_phrases(text)
221
  def convert_tokens_to_string(self, tokens: List[str]) -> str:
222
  """
223
  Converts a sequence of tokens (string) in a single string.
 
213
  text = self.phrase_helper.merge_phrases(text)
214
  return super().encode(text, *args, **kwargs)
215
 
 
 
 
 
 
216
  def convert_tokens_to_string(self, tokens: List[str]) -> str:
217
  """
218
  Converts a sequence of tokens (string) in a single string.