File size: 1,228 Bytes
21baa2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
'''
Created on ูกูกโ€/ู ูฃโ€/ูขู ูกู 

@Created by: Muhammad Altabba
'''
#from Lexicon.NormalizationRulesDict import SecondNormDict

'''
This dictionary stores regular expressions... for the normalization phase.
To see this file clear just copy and paste on notepad!
'''
FirstNormDict = dict()
#FirstNormDict[char()] = char()+char();
FirstNormDict[chr(1600)] = ''   #hyphen 
FirstNormDict[chr(1570)] = 'ุกุง' #alef-mada



'''
This dictionary stores regular expressions... for the normalization phase.
To see this file clear just copy and paste on notepad!
'''
SecondNormDict = dict()
SecondNormDict[chr(1611)] = ''   # fathatan ู‹ ู‹ ู‹ ู‹ ู‹ (SHIFT+W)
SecondNormDict[chr(1612)] = ''   #damatan  ูŒ ูŒ ูŒ ูŒ ูŒ (SHIFT+R)
SecondNormDict[chr(1613)] = ''   #kasratan ู ู ู ู ู (SHIFT+S)
SecondNormDict[chr(1614)] = ''   #fathaูŽูŽูŽูŽูŽ (SHIFT+Q)
SecondNormDict[chr(1615)] = ''   #dama ู ู ู ู ู (SHIFT+E)
SecondNormDict[chr(1616)] = ''   #kasra ู ู ู ู ู (SHIFT+A)
SecondNormDict[chr(1617)] = ''   #shada ู‘ ู‘ ู‘ ู‘ ู‘ (SHIFT+`)
SecondNormDict[chr(1618)] = ''   #sokon ู’ ู’ ู’ ู’ ู’ (SHIFT+X)
#SecondNormDict['ุฆ'] = 'ุก'
#SecondNormDict['ุค'] = 'ุก'
#SecondNormDict['ุฃ'] = 'ุก'
#SecondNormDict['ุฅ'] = 'ุก'