# Arabic Rule Set # Written by: Emily # Last Updated: 2019-12-04 type,sfrom,sto,weight,precede,follow,comment # Class Rules class,consonant,(b|t|θ|dʒ|ħ|x|d|ð|r|z|s|ʃ|sˤ|dˤ|tˤ|ðˤ|ʕ|ɣ|f|q|k|l|m|n|h|w|j|ʔ),,,, class,shortV,[ ُِ َ],,,, class,shortV-output,[aiu],,,, class,diacritic,[ ْٕٔ ًٍّٓ ٌ ِٰ َُ],,,, class,hamza-combo,[اوي],,,, class,hamza-combo-output,(j|w|a ː),,,, class,diphthong-combo,[jw],,,, ## Graphemes class,alif,[ا],,,, class,b,[ب],,,, class,t,[ت],,,, class,θ,[ث],,,, class,dʒ,[ج],,,, class,ħ,[ح],,,, class,x,[خ],,,, class,d,[د],,,, class,ð,[ذ],,,, class,r,[ر],,,, class,z,[ز],,,, class,s,[س],,,, class,ʃ,[ش],,,, class,emph-s,[ص],,,, class,emph-d,[ض],,,, class,emph-t,[ط],,,, class,emph-ð,[ظ],,,, class,ʕ,[ع],,,, class,ɣ,[غ],,,, class,f,[ف],,,, class,q,[ق],,,, class,k,[ك],,,, class,l,[ل],,,, class,m,[م],,,, class,n,[ن],,,, class,h,[ه],,,, class,w-uu,[و],,,, class,j-ii,[ي],,,, class,hamza,[ء],,,, class,ta-marbuta,[ة],,,, class,alif-maq,[ى],,,, ## Diacritics class,u,[ُ],,,, class,a,[َ],,,, class,i,[ِ],,,, class,sup-alif,[ٰ],,,, class,dia-hamza-above,[ٔ],,,,, class,dia-hamza-below,[ٕ],,,, class,madda,[ٓ],,,, class,shadda,[ّ],,,, class,sukun,[ْ],,,, class,a-nunnation,[ٌ],,, class,i-nunnation,[ٍ],,, class,u-nunnation,[ً],,, ## Permanent Diacritic Grapheme Combos (some diacritics are not treated as separate from the grapheme) class,alif-hamza-above,[أ],,,, class,alif-hamza-below,[إ],,,, class,w-hamza,[ؤ],,,, class,j-hamza,[ئ],,,, class,alif-madda,[آ],,,, # Sub Rules ## Graphemes sub,{alif},a ː,2,,, sub,{alif},ʔ,3,^,,"word-initial alif is /ʔ/, however, not every word-initial alif is realized as such (primarily the hamza is used) [@Coulmas2003, p. 123] - somewhat compromises the transcription", sub,{b},b,2,,, sub,{t},t,2,,, sub,{θ},θ,2,,, sub,{dʒ},dʒ,2,,, sub,{ħ},ħ,2,,, sub,{x},x,2,,, sub,{d},d,2,,, sub,{ð},ð,2,,, sub,{r},r,2,,, sub,{z},z,2,,, sub,{s},s,2,,, sub,{ʃ},ʃ,2,,, sub,{emph-s},sˤ,2,,, sub,{emph-d},dˤ,2,,, sub,{emph-t},tˤ,2,,, sub,{emph-ð},ðˤ,2,,, sub,{ʕ},ʕ,2,,, sub,{ɣ},ɣ,2,,, sub,{f},f,2,,, sub,{q},q,2,,, sub,{k},k,2,,, sub,{l},l,2,,, sub,{m},m,2,,, sub,{n},n,2,,, sub,{h},h,2,,, sub,{w-uu},w,2,,, sub,{j-ii},j,2,,, sub,{hamza},ʔ,2,,, sub,{ta-marbuta},t,3,,{diacritic}$,"ta-marbuta occurs word-finally and if followed by a diacritic is recognized as /t/", sub,{diacritic},,3,{ta-marbuta},$,"clean-up", sub,{ta-marbuta},,2,,$,"ta-marbuta transcribes to nothing when not followed by a diacritic", sub,{alif-maq},a,2,,$,"alif-maq occurs word-finally", ## Diacritics sub,{u},u,2,,, sub,{a},a,2,,, sub,{i},i,2,,, sub,{sup-alif},a ː,2,,, sub,{dia-hamza-above},1ʔ,6,{hamza-combo},,"this transcribes the hamza sequences as the consonant followed by a glottal stop, but we need an ipasub rule to make the glottal stop precede the consonant", sub,{alif},,4,^,{dia-hamza-above},"alif-hamza (above) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed", sub,{alif},ʔ i,4,,{dia-hamza-below}, sub,{alif},ʔ,5,^,{dia-hamza-below},"alif-hamza (below) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed", sub,{dia-hamza-below},,2,{alif},,"clean-up", sub,{alif},ʔ a ː,3,,{madda}, sub,{shadda},1ː,2,,,"needed for ipasub gemination rules below", sub,{sukun},,2,,, sub,{a-nunnation},a n,3,,$, sub,{a-nunnation},a n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant [@Habash2010, p. 11]", sub,{i-nunnation},i n,3,,$, sub,{i-nunnation},i n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)", sub,{u-nunnation},u n,3,,$, sub,{u-nunnation},u n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)", ## Permanent Diacritic Grapheme Combos sub,{alif-hamza-above},ʔ a ː,3,,, sub,{alif-hamza-above},ʔ,4,^,,"alif-hamza (above) only corresponds to glottal stop word-initially - alif is a place holder", sub,{alif-hamza-below},ʔ i,3,,, sub,{alif-hamza-below},ʔ,4,^,,"alif-hamza (below) only corresponds to glottal stop word-initially - alif is a place holder", sub,{w-hamza},ʔ w,2,,, sub,{j-hamza},ʔ j,2,,, sub,{alif-madda},ʔ a ː,2,,, ## Diphthongs [@Javed, p. 6] sub,{a},aw,3,,{w-uu}, sub,{w-uu},,3,{a},,"clean-up", sub,{a},aj,3,,{j-ii}, sub,{j-ii},,3,{a},,"clean-up", # Additional Long Vowel Transcriptions sub,{alif},,8,^,{shortV},"word-initial vowels are represented by an inflected alif [@Habash2012, p. 712]", sub,{a},a ː,5,,{alif}, sub,{alif},,4,{a},,"clean-up", sub,{a},a ː,5,,{alif-maq}, sub,{alif-maq},,4,{a},,"clean-up", sub,{u},u 2ː,5,,{w-uu}, sub,{w-uu},,4,{u},,"clean-up", sub,{i},i 3ː,5,,{j-ii}, sub,{j-ii},,4,{i},,"clean-up", ## Alif-maqsura sub,{alif-maq},j ː,6,,{shortV}{shadda},"alif-maq changes into a yaa if followed by a diacritic (generally a shadda) [@Habash2010, p. 61]", sub,{alif-maq},j ː,6,,{shadda}, sub,{shadda},,6,{alif-maq}{shortV},, # ipasub Rules ## Consonant Gemination ipasub,({consonant}) ({shortV-output}) 1ː,\1 ː \2,3,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a short vowel", ipasub,({consonant}) a 1ː ({diphthong-combo}),\1 ː a\3,4,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a diphthong", ipasub,({consonant}) 1ː,\1 ː,2,,,"this removes the 1 if the consonant is not followed by a short vowel" ipasub,({shortV-output}) 2ː ({shortV-output}) 1ː,\1 w ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. Dammah + waw) with following short vowel", ipasub,({shortV-output}) 2ː 1ː,\1 w ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. dammah + waw)", ipasub,({shortV-output}) 2ː u n 1ː,\1 w ː u n,4,,,"flips around the nunation and gemination", ipasub,({shortV-output}) 2ː a n 1ː,\1 w ː a n,4,,,"flips around the nunation and gemination", ipasub,({shortV-output}) 2ː i n 1ː,\1 w ː i n,4,,,"flips around the nunation and gemination", ipasub,2ː,ː,2,,,"removes the 2 from the rest of the elongated /u/s", ipasub,({shortV-output}) 3ː ({shortV-output}) 1ː,\1 j ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa) with following short vowel", ipasub,({shortV-output}) 3ː 1ː,\1 j ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa)", ipasub,({shortV-output}) 3ː i n 1ː,\1 j ː i n,4,,,"flips around the nunation and gemination", ipasub,({shortV-output}) 3ː u n 1ː,\1 j ː u n,4,,,"flips around the nunation and gemination", ipasub,({shortV-output}) 3ː a n 1ː,\1 j ː a n,4,,,"flips around the nunation and gemination", ipasub,1ː,@,1,,,"rules out illegal combos (shadda appearing with nunnation word-medially over an alif)", ipasub,3ː,ː,2,,"removes the 3 from the rest of the elongated /i/s", ## Hamza (glottal stop) ipasub,({hamza-combo-output}) 1ʔ,ʔ \1,3,,,"this puts the glottal stop before the character carrying the hamza", ipasub,a ː 1ʔ ({shortV-output}),ʔ \1,4,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here", ipasub,ʔ a ː ({shortV-output}),ʔ \1,2,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here (this is the same rule as the one above but uses the permanent alf-hamza-above character)",