|
|
| ''' |
| Created on ١٠/٠٣/٢٠١٠ |
| |
| @Created by: Muhammad Altabba |
| ''' |
|
|
| TokenTypeDict = dict(); |
| TokenTypeDict[None] = "Unspecified" |
| TokenTypeDict[0] = "Text" |
| TokenTypeDict[1] = "Numbers" |
| TokenTypeDict[2] = "White Space" |
| TokenTypeDict[3] = "Punctuation" |
| TokenTypeDict[4] = "Unknown Character" |
|
|
| WhiteSpacesList = []; |
| WhiteSpacesList.append(" "); |
| WhiteSpacesList.append("\t"); |
| WhiteSpacesList.append("\n"); |
| WhiteSpacesList.append("\r"); |
|
|
|
|
| |
| isDigit = []; |
| isDigit.append("1"); |
| isDigit.append("2"); |
| isDigit.append("3"); |
| isDigit.append("4"); |
| isDigit.append("5"); |
| isDigit.append("6"); |
| isDigit.append("7"); |
| isDigit.append("8"); |
| isDigit.append("9"); |
| isDigit.append("0"); |
|
|
| |
| isSep = []; |
| isSep.append(" "); |
| isSep.append("\n"); |
| |
| isSep.append("\r"); |
| isSep.append("\t"); |
| isSep.append("_"); |
| |
| isSep.append(";"); |
| |
| isSep.append("؛"); |
| |
| isSep.append("!"); |
| |
| isSep.append("?"); |
| |
| isSep.append("؟"); |
| isSep.append("\0"); |
|
|
| |
| isAmbiguousA = [] ; |
| isAmbiguousB = [] ; |
| isAmbiguousC = [] ; |
| isAmbiguousD = [] ; |
|
|
| |
| isAmbiguousA.append("."); |
| |
| isAmbiguousB.append("/"); |
| isAmbiguousB.append(","); |
| isAmbiguousB.append("،"); |
| isAmbiguousB.append(":"); |
| |
| |
| |
| |
| |
| isAmbiguousC.append("\'"); |
| isAmbiguousC.append("\""); |
| isAmbiguousC.append("["); |
| isAmbiguousC.append("]"); |
| isAmbiguousC.append("{"); |
| isAmbiguousC.append("}"); |
| isAmbiguousC.append("("); |
| isAmbiguousC.append(")"); |
| |
| isAmbiguousC.append("«"); |
| |
| isAmbiguousC.append("»"); |
| |
| isAmbiguousC.append("<"); |
| |
| isAmbiguousC.append(">"); |
| isAmbiguousC.append("$"); |
| |
| isAmbiguousD.append("+"); |
| isAmbiguousD.append("-"); |
| isAmbiguousD.append("*"); |
| isAmbiguousD.append("&"); |
| isAmbiguousD.append("^"); |
| isAmbiguousD.append("%"); |
| isAmbiguousD.append("#"); |
| isAmbiguousD.append("@"); |
| isAmbiguousD.append("-"); |
| isAmbiguousD.append("~"); |
| isAmbiguousD.append("="); |