""" TurkTokenizer — Turkish morphological tokenizer. TR-MMLU world record: 92% Usage: from turk_tokenizer import TurkTokenizer tok = TurkTokenizer() tokens = tok("İstanbul'da meeting'e katılamadım") # Each token dict contains: # token : str — token string (with leading space if word-initial) # token_type : str — ROOT | SUFFIX | FOREIGN | BPE | PUNCT | # NUM | DATE | UNIT | URL | MENTION | HASHTAG | EMOJI # morph_pos : int — 0=root/word-initial, 1=first suffix, 2=second... """ from .tokenizer import TurkTokenizer __all__ = ["TurkTokenizer"] __version__ = "1.0.0"