niobures's picture
XPF
4a08ba7 verified
# Arabic Rule Set
# Written by: Emily
# Last Updated: 2019-12-04
type,sfrom,sto,weight,precede,follow,comment
# Class Rules
class,consonant,(b|t|θ|dʒ|ħ|x|d|ð|r|z|s|ʃ|sˤ|dˤ|tˤ|ðˤ|ʕ|ɣ|f|q|k|l|m|n|h|w|j|ʔ),,,,
class,shortV,[ ُِ َ],,,,
class,shortV-output,[aiu],,,,
class,diacritic,[ ْٕٔ ًٍّٓ ٌ ِٰ َُ],,,,
class,hamza-combo,[اوي],,,,
class,hamza-combo-output,(j|w|a ː),,,,
class,diphthong-combo,[jw],,,,
## Graphemes
class,alif,[ا],,,,
class,b,[ب],,,,
class,t,[ت],,,,
class,θ,[ث],,,,
class,dʒ,[ج],,,,
class,ħ,[ح],,,,
class,x,[خ],,,,
class,d,[د],,,,
class,ð,[ذ],,,,
class,r,[ر],,,,
class,z,[ز],,,,
class,s,[س],,,,
class,ʃ,[ش],,,,
class,emph-s,[ص],,,,
class,emph-d,[ض],,,,
class,emph-t,[ط],,,,
class,emph-ð,[ظ],,,,
class,ʕ,[ع],,,,
class,ɣ,[غ],,,,
class,f,[ف],,,,
class,q,[ق],,,,
class,k,[ك],,,,
class,l,[ل],,,,
class,m,[م],,,,
class,n,[ن],,,,
class,h,[ه],,,,
class,w-uu,[و],,,,
class,j-ii,[ي],,,,
class,hamza,[ء],,,,
class,ta-marbuta,[ة],,,,
class,alif-maq,[ى],,,,
## Diacritics
class,u,[ُ],,,,
class,a,[َ],,,,
class,i,[ِ],,,,
class,sup-alif,[ٰ],,,,
class,dia-hamza-above,[ٔ],,,,,
class,dia-hamza-below,[ٕ],,,,
class,madda,[ٓ],,,,
class,shadda,[ّ],,,,
class,sukun,[ْ],,,,
class,a-nunnation,[ٌ],,,
class,i-nunnation,[ٍ],,,
class,u-nunnation,[ً],,,
## Permanent Diacritic Grapheme Combos (some diacritics are not treated as separate from the grapheme)
class,alif-hamza-above,[أ],,,,
class,alif-hamza-below,[إ],,,,
class,w-hamza,[ؤ],,,,
class,j-hamza,[ئ],,,,
class,alif-madda,[آ],,,,
# Sub Rules
## Graphemes
sub,{alif},a ː,2,,,
sub,{alif},ʔ,3,^,,"word-initial alif is /ʔ/, however, not every word-initial alif is realized as such (primarily the hamza is used) [@Coulmas2003, p. 123] - somewhat compromises the transcription",
sub,{b},b,2,,,
sub,{t},t,2,,,
sub,{θ},θ,2,,,
sub,{dʒ},dʒ,2,,,
sub,{ħ},ħ,2,,,
sub,{x},x,2,,,
sub,{d},d,2,,,
sub,{ð},ð,2,,,
sub,{r},r,2,,,
sub,{z},z,2,,,
sub,{s},s,2,,,
sub,{ʃ},ʃ,2,,,
sub,{emph-s},sˤ,2,,,
sub,{emph-d},dˤ,2,,,
sub,{emph-t},tˤ,2,,,
sub,{emph-ð},ðˤ,2,,,
sub,{ʕ},ʕ,2,,,
sub,{ɣ},ɣ,2,,,
sub,{f},f,2,,,
sub,{q},q,2,,,
sub,{k},k,2,,,
sub,{l},l,2,,,
sub,{m},m,2,,,
sub,{n},n,2,,,
sub,{h},h,2,,,
sub,{w-uu},w,2,,,
sub,{j-ii},j,2,,,
sub,{hamza},ʔ,2,,,
sub,{ta-marbuta},t,3,,{diacritic}$,"ta-marbuta occurs word-finally and if followed by a diacritic is recognized as /t/",
sub,{diacritic},,3,{ta-marbuta},$,"clean-up",
sub,{ta-marbuta},,2,,$,"ta-marbuta transcribes to nothing when not followed by a diacritic",
sub,{alif-maq},a,2,,$,"alif-maq occurs word-finally",
## Diacritics
sub,{u},u,2,,,
sub,{a},a,2,,,
sub,{i},i,2,,,
sub,{sup-alif},a ː,2,,,
sub,{dia-hamza-above},1ʔ,6,{hamza-combo},,"this transcribes the hamza sequences as the consonant followed by a glottal stop, but we need an ipasub rule to make the glottal stop precede the consonant",
sub,{alif},,4,^,{dia-hamza-above},"alif-hamza (above) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed",
sub,{alif},ʔ i,4,,{dia-hamza-below},
sub,{alif},ʔ,5,^,{dia-hamza-below},"alif-hamza (below) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed",
sub,{dia-hamza-below},,2,{alif},,"clean-up",
sub,{alif},ʔ a ː,3,,{madda},
sub,{shadda},1ː,2,,,"needed for ipasub gemination rules below",
sub,{sukun},,2,,,
sub,{a-nunnation},a n,3,,$,
sub,{a-nunnation},a n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant [@Habash2010, p. 11]",
sub,{i-nunnation},i n,3,,$,
sub,{i-nunnation},i n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)",
sub,{u-nunnation},u n,3,,$,
sub,{u-nunnation},u n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)",
## Permanent Diacritic Grapheme Combos
sub,{alif-hamza-above},ʔ a ː,3,,,
sub,{alif-hamza-above},ʔ,4,^,,"alif-hamza (above) only corresponds to glottal stop word-initially - alif is a place holder",
sub,{alif-hamza-below},ʔ i,3,,,
sub,{alif-hamza-below},ʔ,4,^,,"alif-hamza (below) only corresponds to glottal stop word-initially - alif is a place holder",
sub,{w-hamza},ʔ w,2,,,
sub,{j-hamza},ʔ j,2,,,
sub,{alif-madda},ʔ a ː,2,,,
## Diphthongs [@Javed, p. 6]
sub,{a},aw,3,,{w-uu},
sub,{w-uu},,3,{a},,"clean-up",
sub,{a},aj,3,,{j-ii},
sub,{j-ii},,3,{a},,"clean-up",
# Additional Long Vowel Transcriptions
sub,{alif},,8,^,{shortV},"word-initial vowels are represented by an inflected alif [@Habash2012, p. 712]",
sub,{a},a ː,5,,{alif},
sub,{alif},,4,{a},,"clean-up",
sub,{a},a ː,5,,{alif-maq},
sub,{alif-maq},,4,{a},,"clean-up",
sub,{u},u 2ː,5,,{w-uu},
sub,{w-uu},,4,{u},,"clean-up",
sub,{i},i 3ː,5,,{j-ii},
sub,{j-ii},,4,{i},,"clean-up",
## Alif-maqsura
sub,{alif-maq},j ː,6,,{shortV}{shadda},"alif-maq changes into a yaa if followed by a diacritic (generally a shadda) [@Habash2010, p. 61]",
sub,{alif-maq},j ː,6,,{shadda},
sub,{shadda},,6,{alif-maq}{shortV},,
# ipasub Rules
## Consonant Gemination
ipasub,({consonant}) ({shortV-output}) 1ː,\1 ː \2,3,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a short vowel",
ipasub,({consonant}) a 1ː ({diphthong-combo}),\1 ː a\3,4,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a diphthong",
ipasub,({consonant}) 1ː,\1 ː,2,,,"this removes the 1 if the consonant is not followed by a short vowel"
ipasub,({shortV-output}) 2ː ({shortV-output}) 1ː,\1 w ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. Dammah + waw) with following short vowel",
ipasub,({shortV-output}) 2ː 1ː,\1 w ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. dammah + waw)",
ipasub,({shortV-output}) 2ː u n 1ː,\1 w ː u n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 2ː a n 1ː,\1 w ː a n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 2ː i n 1ː,\1 w ː i n,4,,,"flips around the nunation and gemination",
ipasub,2ː,ː,2,,,"removes the 2 from the rest of the elongated /u/s",
ipasub,({shortV-output}) 3ː ({shortV-output}) 1ː,\1 j ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa) with following short vowel",
ipasub,({shortV-output}) 3ː 1ː,\1 j ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa)",
ipasub,({shortV-output}) 3ː i n 1ː,\1 j ː i n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 3ː u n 1ː,\1 j ː u n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 3ː a n 1ː,\1 j ː a n,4,,,"flips around the nunation and gemination",
ipasub,1ː,@,1,,,"rules out illegal combos (shadda appearing with nunnation word-medially over an alif)",
ipasub,3ː,ː,2,,"removes the 3 from the rest of the elongated /i/s",
## Hamza (glottal stop)
ipasub,({hamza-combo-output}) 1ʔ,ʔ \1,3,,,"this puts the glottal stop before the character carrying the hamza",
ipasub,a ː 1ʔ ({shortV-output}),ʔ \1,4,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here",
ipasub,ʔ a ː ({shortV-output}),ʔ \1,2,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here (this is the same rule as the one above but uses the permanent alf-hamza-above character)",