| # Arabic Rule Set | |
| # Written by: Emily | |
| # Last Updated: 2019-12-04 | |
| type,sfrom,sto,weight,precede,follow,comment | |
| # Class Rules | |
| class,consonant,(b|t|θ|dʒ|ħ|x|d|ð|r|z|s|ʃ|sˤ|dˤ|tˤ|ðˤ|ʕ|ɣ|f|q|k|l|m|n|h|w|j|ʔ),,,, | |
| class,shortV,[ ُِ َ],,,, | |
| class,shortV-output,[aiu],,,, | |
| class,diacritic,[ ْٕٔ ًٍّٓ ٌ ِٰ َُ],,,, | |
| class,hamza-combo,[اوي],,,, | |
| class,hamza-combo-output,(j|w|a ː),,,, | |
| class,diphthong-combo,[jw],,,, | |
| ## Graphemes | |
| class,alif,[ا],,,, | |
| class,b,[ب],,,, | |
| class,t,[ت],,,, | |
| class,θ,[ث],,,, | |
| class,dʒ,[ج],,,, | |
| class,ħ,[ح],,,, | |
| class,x,[خ],,,, | |
| class,d,[د],,,, | |
| class,ð,[ذ],,,, | |
| class,r,[ر],,,, | |
| class,z,[ز],,,, | |
| class,s,[س],,,, | |
| class,ʃ,[ش],,,, | |
| class,emph-s,[ص],,,, | |
| class,emph-d,[ض],,,, | |
| class,emph-t,[ط],,,, | |
| class,emph-ð,[ظ],,,, | |
| class,ʕ,[ع],,,, | |
| class,ɣ,[غ],,,, | |
| class,f,[ف],,,, | |
| class,q,[ق],,,, | |
| class,k,[ك],,,, | |
| class,l,[ل],,,, | |
| class,m,[م],,,, | |
| class,n,[ن],,,, | |
| class,h,[ه],,,, | |
| class,w-uu,[و],,,, | |
| class,j-ii,[ي],,,, | |
| class,hamza,[ء],,,, | |
| class,ta-marbuta,[ة],,,, | |
| class,alif-maq,[ى],,,, | |
| ## Diacritics | |
| class,u,[ُ],,,, | |
| class,a,[َ],,,, | |
| class,i,[ِ],,,, | |
| class,sup-alif,[ٰ],,,, | |
| class,dia-hamza-above,[ٔ],,,,, | |
| class,dia-hamza-below,[ٕ],,,, | |
| class,madda,[ٓ],,,, | |
| class,shadda,[ّ],,,, | |
| class,sukun,[ْ],,,, | |
| class,a-nunnation,[ٌ],,, | |
| class,i-nunnation,[ٍ],,, | |
| class,u-nunnation,[ً],,, | |
| ## Permanent Diacritic Grapheme Combos (some diacritics are not treated as separate from the grapheme) | |
| class,alif-hamza-above,[أ],,,, | |
| class,alif-hamza-below,[إ],,,, | |
| class,w-hamza,[ؤ],,,, | |
| class,j-hamza,[ئ],,,, | |
| class,alif-madda,[آ],,,, | |
| # Sub Rules | |
| ## Graphemes | |
| sub,{alif},a ː,2,,, | |
| sub,{alif},ʔ,3,^,,"word-initial alif is /ʔ/, however, not every word-initial alif is realized as such (primarily the hamza is used) [@Coulmas2003, p. 123] - somewhat compromises the transcription", | |
| sub,{b},b,2,,, | |
| sub,{t},t,2,,, | |
| sub,{θ},θ,2,,, | |
| sub,{dʒ},dʒ,2,,, | |
| sub,{ħ},ħ,2,,, | |
| sub,{x},x,2,,, | |
| sub,{d},d,2,,, | |
| sub,{ð},ð,2,,, | |
| sub,{r},r,2,,, | |
| sub,{z},z,2,,, | |
| sub,{s},s,2,,, | |
| sub,{ʃ},ʃ,2,,, | |
| sub,{emph-s},sˤ,2,,, | |
| sub,{emph-d},dˤ,2,,, | |
| sub,{emph-t},tˤ,2,,, | |
| sub,{emph-ð},ðˤ,2,,, | |
| sub,{ʕ},ʕ,2,,, | |
| sub,{ɣ},ɣ,2,,, | |
| sub,{f},f,2,,, | |
| sub,{q},q,2,,, | |
| sub,{k},k,2,,, | |
| sub,{l},l,2,,, | |
| sub,{m},m,2,,, | |
| sub,{n},n,2,,, | |
| sub,{h},h,2,,, | |
| sub,{w-uu},w,2,,, | |
| sub,{j-ii},j,2,,, | |
| sub,{hamza},ʔ,2,,, | |
| sub,{ta-marbuta},t,3,,{diacritic}$,"ta-marbuta occurs word-finally and if followed by a diacritic is recognized as /t/", | |
| sub,{diacritic},,3,{ta-marbuta},$,"clean-up", | |
| sub,{ta-marbuta},,2,,$,"ta-marbuta transcribes to nothing when not followed by a diacritic", | |
| sub,{alif-maq},a,2,,$,"alif-maq occurs word-finally", | |
| ## Diacritics | |
| sub,{u},u,2,,, | |
| sub,{a},a,2,,, | |
| sub,{i},i,2,,, | |
| sub,{sup-alif},a ː,2,,, | |
| sub,{dia-hamza-above},1ʔ,6,{hamza-combo},,"this transcribes the hamza sequences as the consonant followed by a glottal stop, but we need an ipasub rule to make the glottal stop precede the consonant", | |
| sub,{alif},,4,^,{dia-hamza-above},"alif-hamza (above) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed", | |
| sub,{alif},ʔ i,4,,{dia-hamza-below}, | |
| sub,{alif},ʔ,5,^,{dia-hamza-below},"alif-hamza (below) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed", | |
| sub,{dia-hamza-below},,2,{alif},,"clean-up", | |
| sub,{alif},ʔ a ː,3,,{madda}, | |
| sub,{shadda},1ː,2,,,"needed for ipasub gemination rules below", | |
| sub,{sukun},,2,,, | |
| sub,{a-nunnation},a n,3,,$, | |
| sub,{a-nunnation},a n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant [@Habash2010, p. 11]", | |
| sub,{i-nunnation},i n,3,,$, | |
| sub,{i-nunnation},i n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)", | |
| sub,{u-nunnation},u n,3,,$, | |
| sub,{u-nunnation},u n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)", | |
| ## Permanent Diacritic Grapheme Combos | |
| sub,{alif-hamza-above},ʔ a ː,3,,, | |
| sub,{alif-hamza-above},ʔ,4,^,,"alif-hamza (above) only corresponds to glottal stop word-initially - alif is a place holder", | |
| sub,{alif-hamza-below},ʔ i,3,,, | |
| sub,{alif-hamza-below},ʔ,4,^,,"alif-hamza (below) only corresponds to glottal stop word-initially - alif is a place holder", | |
| sub,{w-hamza},ʔ w,2,,, | |
| sub,{j-hamza},ʔ j,2,,, | |
| sub,{alif-madda},ʔ a ː,2,,, | |
| ## Diphthongs [@Javed, p. 6] | |
| sub,{a},aw,3,,{w-uu}, | |
| sub,{w-uu},,3,{a},,"clean-up", | |
| sub,{a},aj,3,,{j-ii}, | |
| sub,{j-ii},,3,{a},,"clean-up", | |
| # Additional Long Vowel Transcriptions | |
| sub,{alif},,8,^,{shortV},"word-initial vowels are represented by an inflected alif [@Habash2012, p. 712]", | |
| sub,{a},a ː,5,,{alif}, | |
| sub,{alif},,4,{a},,"clean-up", | |
| sub,{a},a ː,5,,{alif-maq}, | |
| sub,{alif-maq},,4,{a},,"clean-up", | |
| sub,{u},u 2ː,5,,{w-uu}, | |
| sub,{w-uu},,4,{u},,"clean-up", | |
| sub,{i},i 3ː,5,,{j-ii}, | |
| sub,{j-ii},,4,{i},,"clean-up", | |
| ## Alif-maqsura | |
| sub,{alif-maq},j ː,6,,{shortV}{shadda},"alif-maq changes into a yaa if followed by a diacritic (generally a shadda) [@Habash2010, p. 61]", | |
| sub,{alif-maq},j ː,6,,{shadda}, | |
| sub,{shadda},,6,{alif-maq}{shortV},, | |
| # ipasub Rules | |
| ## Consonant Gemination | |
| ipasub,({consonant}) ({shortV-output}) 1ː,\1 ː \2,3,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a short vowel", | |
| ipasub,({consonant}) a 1ː ({diphthong-combo}),\1 ː a\3,4,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a diphthong", | |
| ipasub,({consonant}) 1ː,\1 ː,2,,,"this removes the 1 if the consonant is not followed by a short vowel" | |
| ipasub,({shortV-output}) 2ː ({shortV-output}) 1ː,\1 w ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. Dammah + waw) with following short vowel", | |
| ipasub,({shortV-output}) 2ː 1ː,\1 w ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. dammah + waw)", | |
| ipasub,({shortV-output}) 2ː u n 1ː,\1 w ː u n,4,,,"flips around the nunation and gemination", | |
| ipasub,({shortV-output}) 2ː a n 1ː,\1 w ː a n,4,,,"flips around the nunation and gemination", | |
| ipasub,({shortV-output}) 2ː i n 1ː,\1 w ː i n,4,,,"flips around the nunation and gemination", | |
| ipasub,2ː,ː,2,,,"removes the 2 from the rest of the elongated /u/s", | |
| ipasub,({shortV-output}) 3ː ({shortV-output}) 1ː,\1 j ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa) with following short vowel", | |
| ipasub,({shortV-output}) 3ː 1ː,\1 j ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa)", | |
| ipasub,({shortV-output}) 3ː i n 1ː,\1 j ː i n,4,,,"flips around the nunation and gemination", | |
| ipasub,({shortV-output}) 3ː u n 1ː,\1 j ː u n,4,,,"flips around the nunation and gemination", | |
| ipasub,({shortV-output}) 3ː a n 1ː,\1 j ː a n,4,,,"flips around the nunation and gemination", | |
| ipasub,1ː,@,1,,,"rules out illegal combos (shadda appearing with nunnation word-medially over an alif)", | |
| ipasub,3ː,ː,2,,"removes the 3 from the rest of the elongated /i/s", | |
| ## Hamza (glottal stop) | |
| ipasub,({hamza-combo-output}) 1ʔ,ʔ \1,3,,,"this puts the glottal stop before the character carrying the hamza", | |
| ipasub,a ː 1ʔ ({shortV-output}),ʔ \1,4,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here", | |
| ipasub,ʔ a ː ({shortV-output}),ʔ \1,2,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here (this is the same rule as the one above but uses the permanent alf-hamza-above character)", | |