Upload tokenizer_hf.py with huggingface_hub
Browse files- tokenizer_hf.py +1 -1
tokenizer_hf.py
CHANGED
|
@@ -86,7 +86,7 @@ class PaniniTokenizerHF(PreTrainedTokenizer):
|
|
| 86 |
|
| 87 |
if self._splitter:
|
| 88 |
# Use morphological splitting
|
| 89 |
-
split_result = self._splitter.
|
| 90 |
if split_result.is_compound and len(split_result.components) > 1:
|
| 91 |
for j, comp in enumerate(split_result.components):
|
| 92 |
if j == 0:
|
|
|
|
| 86 |
|
| 87 |
if self._splitter:
|
| 88 |
# Use morphological splitting
|
| 89 |
+
split_result = self._splitter.split_v4(word) # V1.5: Sandhi expansion
|
| 90 |
if split_result.is_compound and len(split_result.components) > 1:
|
| 91 |
for j, comp in enumerate(split_result.components):
|
| 92 |
if j == 0:
|