File size: 7,546 Bytes
4a08ba7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# Arabic Rule Set
# Written by: Emily
# Last Updated: 2019-12-04
type,sfrom,sto,weight,precede,follow,comment
# Class Rules
class,consonant,(b|t|θ|dʒ|ħ|x|d|ð|r|z|s|ʃ|sˤ|dˤ|tˤ|ðˤ|ʕ|ɣ|f|q|k|l|m|n|h|w|j|ʔ),,,,
class,shortV,[  ُِ َ],,,,
class,shortV-output,[aiu],,,,
class,diacritic,[ ْٕٔ ًٍّٓ ٌ ِٰ َُ],,,,
class,hamza-combo,[اوي],,,,
class,hamza-combo-output,(j|w|a ː),,,,
class,diphthong-combo,[jw],,,,
## Graphemes
class,alif,[ا],,,,
class,b,[ب],,,,
class,t,[ت],,,,
class,θ,[ث],,,,
class,dʒ,[ج],,,,
class,ħ,[ح],,,,
class,x,[خ],,,,
class,d,[د],,,,
class,ð,[ذ],,,,
class,r,[ر],,,,
class,z,[ز],,,,
class,s,[س],,,,
class,ʃ,[ش],,,,
class,emph-s,[ص],,,,
class,emph-d,[ض],,,,
class,emph-t,[ط],,,,
class,emph-ð,[ظ],,,,
class,ʕ,[ع],,,,
class,ɣ,[غ],,,,
class,f,[ف],,,,
class,q,[ق],,,,
class,k,[ك],,,,
class,l,[ل],,,,
class,m,[م],,,,
class,n,[ن],,,,
class,h,[ه],,,,
class,w-uu,[و],,,,
class,j-ii,[ي],,,,
class,hamza,[ء],,,,
class,ta-marbuta,[ة],,,,
class,alif-maq,[ى],,,,
## Diacritics
class,u,[ُ],,,,
class,a,[َ],,,,
class,i,[ِ],,,,
class,sup-alif,[ٰ],,,,
class,dia-hamza-above,[ٔ],,,,,
class,dia-hamza-below,[ٕ],,,,
class,madda,[ٓ],,,,
class,shadda,[ّ],,,,
class,sukun,[ْ],,,,
class,a-nunnation,[ٌ],,,
class,i-nunnation,[ٍ],,,
class,u-nunnation,[ً],,,
## Permanent Diacritic Grapheme Combos (some diacritics are not treated as separate from the grapheme)
class,alif-hamza-above,[أ],,,,
class,alif-hamza-below,[إ],,,,
class,w-hamza,[ؤ],,,,
class,j-hamza,[ئ],,,,
class,alif-madda,[آ],,,,
# Sub Rules
## Graphemes
sub,{alif},a ː,2,,,
sub,{alif},ʔ,3,^,,"word-initial alif is /ʔ/, however, not every word-initial alif is realized as such (primarily the hamza is used) [@Coulmas2003, p. 123] - somewhat compromises the transcription",
sub,{b},b,2,,,
sub,{t},t,2,,,
sub,{θ},θ,2,,,
sub,{dʒ},dʒ,2,,,
sub,{ħ},ħ,2,,,
sub,{x},x,2,,,
sub,{d},d,2,,,
sub,{ð},ð,2,,,
sub,{r},r,2,,,
sub,{z},z,2,,,
sub,{s},s,2,,,
sub,{ʃ},ʃ,2,,,
sub,{emph-s},sˤ,2,,,
sub,{emph-d},dˤ,2,,,
sub,{emph-t},tˤ,2,,,
sub,{emph-ð},ðˤ,2,,,
sub,{ʕ},ʕ,2,,,
sub,{ɣ},ɣ,2,,,
sub,{f},f,2,,,
sub,{q},q,2,,,
sub,{k},k,2,,,
sub,{l},l,2,,,
sub,{m},m,2,,,
sub,{n},n,2,,,
sub,{h},h,2,,,
sub,{w-uu},w,2,,,
sub,{j-ii},j,2,,,
sub,{hamza},ʔ,2,,,
sub,{ta-marbuta},t,3,,{diacritic}$,"ta-marbuta occurs word-finally and if followed by a diacritic is recognized as /t/",
sub,{diacritic},,3,{ta-marbuta},$,"clean-up",
sub,{ta-marbuta},,2,,$,"ta-marbuta transcribes to nothing when not followed by a diacritic",
sub,{alif-maq},a,2,,$,"alif-maq occurs word-finally",
## Diacritics
sub,{u},u,2,,,
sub,{a},a,2,,,
sub,{i},i,2,,,
sub,{sup-alif},a ː,2,,,
sub,{dia-hamza-above},1ʔ,6,{hamza-combo},,"this transcribes the hamza sequences as the consonant followed by a glottal stop, but we need an ipasub rule to make the glottal stop precede the consonant",
sub,{alif},,4,^,{dia-hamza-above},"alif-hamza (above) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed",
sub,{alif},ʔ i,4,,{dia-hamza-below},
sub,{alif},ʔ,5,^,{dia-hamza-below},"alif-hamza (below) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed",
sub,{dia-hamza-below},,2,{alif},,"clean-up",
sub,{alif},ʔ a ː,3,,{madda},
sub,{shadda},1ː,2,,,"needed for ipasub gemination rules below",
sub,{sukun},,2,,,
sub,{a-nunnation},a n,3,,$,
sub,{a-nunnation},a n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant [@Habash2010, p. 11]",
sub,{i-nunnation},i n,3,,$,
sub,{i-nunnation},i n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)",
sub,{u-nunnation},u n,3,,$,
sub,{u-nunnation},u n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)",
## Permanent Diacritic Grapheme Combos
sub,{alif-hamza-above},ʔ a ː,3,,,
sub,{alif-hamza-above},ʔ,4,^,,"alif-hamza (above) only corresponds to glottal stop word-initially - alif is a place holder",
sub,{alif-hamza-below},ʔ i,3,,,
sub,{alif-hamza-below},ʔ,4,^,,"alif-hamza (below) only corresponds to glottal stop word-initially - alif is a place holder",
sub,{w-hamza},ʔ w,2,,,
sub,{j-hamza},ʔ j,2,,,
sub,{alif-madda},ʔ a ː,2,,,
## Diphthongs [@Javed, p. 6]
sub,{a},aw,3,,{w-uu},
sub,{w-uu},,3,{a},,"clean-up",
sub,{a},aj,3,,{j-ii},
sub,{j-ii},,3,{a},,"clean-up",
# Additional Long Vowel Transcriptions
sub,{alif},,8,^,{shortV},"word-initial vowels are represented by an inflected alif [@Habash2012, p. 712]",
sub,{a},a ː,5,,{alif},
sub,{alif},,4,{a},,"clean-up",
sub,{a},a ː,5,,{alif-maq},
sub,{alif-maq},,4,{a},,"clean-up",
sub,{u},u 2ː,5,,{w-uu},
sub,{w-uu},,4,{u},,"clean-up",
sub,{i},i 3ː,5,,{j-ii},
sub,{j-ii},,4,{i},,"clean-up",
## Alif-maqsura
sub,{alif-maq},j ː,6,,{shortV}{shadda},"alif-maq changes into a yaa if followed by a diacritic (generally a shadda) [@Habash2010, p. 61]",
sub,{alif-maq},j ː,6,,{shadda},
sub,{shadda},,6,{alif-maq}{shortV},,
# ipasub Rules
## Consonant Gemination
ipasub,({consonant}) ({shortV-output}) 1ː,\1 ː \2,3,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a short vowel",
ipasub,({consonant}) a 1ː ({diphthong-combo}),\1 ː a\3,4,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a diphthong",
ipasub,({consonant}) 1ː,\1 ː,2,,,"this removes the 1 if the consonant is not followed by a short vowel"
ipasub,({shortV-output}) 2ː ({shortV-output}) 1ː,\1 w ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. Dammah + waw) with following short vowel",
ipasub,({shortV-output}) 2ː 1ː,\1 w ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. dammah + waw)",
ipasub,({shortV-output}) 2ː u n 1ː,\1 w ː u n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 2ː a n 1ː,\1 w ː a n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 2ː i n 1ː,\1 w ː i n,4,,,"flips around the nunation and gemination",
ipasub,2ː,ː,2,,,"removes the 2 from the rest of the elongated /u/s",
ipasub,({shortV-output}) 3ː ({shortV-output}) 1ː,\1 j ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa) with following short vowel",
ipasub,({shortV-output}) 3ː 1ː,\1 j ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa)",
ipasub,({shortV-output}) 3ː i n 1ː,\1 j ː i n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 3ː u n 1ː,\1 j ː u n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 3ː a n 1ː,\1 j ː a n,4,,,"flips around the nunation and gemination",
ipasub,1ː,@,1,,,"rules out illegal combos (shadda appearing with nunnation word-medially over an alif)",
ipasub,3ː,ː,2,,"removes the 3 from the rest of the elongated /i/s",
## Hamza (glottal stop)
ipasub,({hamza-combo-output}) 1ʔ,ʔ \1,3,,,"this puts the glottal stop before the character carrying the hamza",
ipasub,a ː 1ʔ ({shortV-output}),ʔ \1,4,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here",
ipasub,ʔ a ː ({shortV-output}),ʔ \1,2,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here (this is the same rule as the one above but uses the permanent alf-hamza-above character)",