ArthaLabs commited on
Commit
cbe5930
·
verified ·
1 Parent(s): 77111fb

Upload tokenizer_hf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_hf.py +1 -1
tokenizer_hf.py CHANGED
@@ -86,7 +86,7 @@ class PaniniTokenizerHF(PreTrainedTokenizer):
86
 
87
  if self._splitter:
88
  # Use morphological splitting
89
- split_result = self._splitter.split(word)
90
  if split_result.is_compound and len(split_result.components) > 1:
91
  for j, comp in enumerate(split_result.components):
92
  if j == 0:
 
86
 
87
  if self._splitter:
88
  # Use morphological splitting
89
+ split_result = self._splitter.split_v4(word) # V1.5: Sandhi expansion
90
  if split_result.is_compound and len(split_result.components) > 1:
91
  for j, comp in enumerate(split_result.components):
92
  if j == 0: