File size: 403 Bytes
21baa2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
'''
Created on ١٠/٠٣/٢٠١٠
@Created by: Muhammad Altabba
'''
#the is most (not all )of the rules of Tokenize of Arabic language
#
SentenceSeperatorsList = [];
SentenceSeperatorsList.append(".");
#dot
SentenceSeperatorsList.append("_");
SentenceSeperatorsList.append("\n");
SentenceSeperatorsList.append("\r");
SentenceSeperatorsList.append("\t");
SentenceSeperatorsList.append("\0");
|