| """ | |
| TurkTokenizer — Turkish morphological tokenizer. | |
| TR-MMLU world record: 92% | |
| Usage: | |
| from turk_tokenizer import TurkTokenizer | |
| tok = TurkTokenizer() | |
| tokens = tok("İstanbul'da meeting'e katılamadım") | |
| # Each token dict contains: | |
| # token : str — token string (with leading space if word-initial) | |
| # token_type : str — ROOT | SUFFIX | FOREIGN | BPE | PUNCT | | |
| # NUM | DATE | UNIT | URL | MENTION | HASHTAG | EMOJI | |
| # morph_pos : int — 0=root/word-initial, 1=first suffix, 2=second... | |
| """ | |
| from .tokenizer import TurkTokenizer | |
| __all__ = ["TurkTokenizer"] | |
| __version__ = "1.0.0" | |