File size: 2,238 Bytes
21baa2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

'''
Created on ١٠‏/٠٣‏/٢٠١٠

@Created by: Muhammad Altabba
'''

TokenTypeDict = dict();
TokenTypeDict[None] = "Unspecified"
TokenTypeDict[0] = "Text"
TokenTypeDict[1] = "Numbers"
TokenTypeDict[2] = "White Space"
TokenTypeDict[3] = "Punctuation"
TokenTypeDict[4] = "Unknown Character"

WhiteSpacesList = [];
WhiteSpacesList.append(" ");
WhiteSpacesList.append("\t");
WhiteSpacesList.append("\n");
WhiteSpacesList.append("\r");


#def Digit list
isDigit = [];
isDigit.append("1");
isDigit.append("2");
isDigit.append("3");
isDigit.append("4");
isDigit.append("5");
isDigit.append("6");
isDigit.append("7");
isDigit.append("8");
isDigit.append("9");
isDigit.append("0");

#def Sep list;
isSep = [];
isSep.append(" ");
isSep.append("\n"); 
#Added by Muhammad
isSep.append("\r");
isSep.append("\t");
isSep.append("_");
#Added by Muhammad
isSep.append(";");
#Added by Muhammad
isSep.append("؛");
#Added by Muhammad:
isSep.append("!");
#Added by Muhammad:
isSep.append("?");
#Added by Muhammad:
isSep.append("؟");
isSep.append("\0"); #Used as End_Of_File

#def new Ambiguous lists ; 
isAmbiguousA = [] ;
isAmbiguousB = [] ;
isAmbiguousC = [] ;
isAmbiguousD = [] ;

#new list A for Ambiguous ;
isAmbiguousA.append(".");
#new list B for Ambiguous ;       
isAmbiguousB.append("/");
isAmbiguousB.append(",");
isAmbiguousB.append("،");
isAmbiguousB.append(":");
#new list C for Ambiguous ;
#Removed by Muhammad:
#isAmbiguousC.append("!");
#Removed by Muhammad:
#isAmbiguousC.append("?");
isAmbiguousC.append("\'");
isAmbiguousC.append("\"");
isAmbiguousC.append("[");
isAmbiguousC.append("]");
isAmbiguousC.append("{");
isAmbiguousC.append("}");
isAmbiguousC.append("(");
isAmbiguousC.append(")");
#Added by Muhammad
isAmbiguousC.append("«");
#Added by Muhammad
isAmbiguousC.append("»");
#Added by Muhammad
isAmbiguousC.append("<");
#Added by Muhammad
isAmbiguousC.append(">");
isAmbiguousC.append("$");
#new list D for Math operator Ambiguous ;
isAmbiguousD.append("+");
isAmbiguousD.append("-");
isAmbiguousD.append("*");
isAmbiguousD.append("&");
isAmbiguousD.append("^");
isAmbiguousD.append("%");
isAmbiguousD.append("#");
isAmbiguousD.append("@");
isAmbiguousD.append("-");
isAmbiguousD.append("~");
isAmbiguousD.append("=");