''' Created on ١٠‏/٠٣‏/٢٠١٠ @Created by: Muhammad Altabba ''' #the is most (not all )of the rules of Tokenize of Arabic language # SentenceSeperatorsList = []; SentenceSeperatorsList.append("."); #dot SentenceSeperatorsList.append("_"); SentenceSeperatorsList.append("\n"); SentenceSeperatorsList.append("\r"); SentenceSeperatorsList.append("\t"); SentenceSeperatorsList.append("\0");