File size: 7,546 Bytes
4a08ba7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# Arabic Rule Set
# Written by: Emily
# Last Updated: 2019-12-04
type,sfrom,sto,weight,precede,follow,comment
# Class Rules
class,consonant,(b|t|θ|dʒ|ħ|x|d|ð|r|z|s|ʃ|sˤ|dˤ|tˤ|ðˤ|ʕ|ɣ|f|q|k|l|m|n|h|w|j|ʔ),,,,
class,shortV,[ ُِ َ],,,,
class,shortV-output,[aiu],,,,
class,diacritic,[ ْٕٔ ًٍّٓ ٌ ِٰ َُ],,,,
class,hamza-combo,[اوي],,,,
class,hamza-combo-output,(j|w|a ː),,,,
class,diphthong-combo,[jw],,,,
## Graphemes
class,alif,[ا],,,,
class,b,[ب],,,,
class,t,[ت],,,,
class,θ,[ث],,,,
class,dʒ,[ج],,,,
class,ħ,[ح],,,,
class,x,[خ],,,,
class,d,[د],,,,
class,ð,[ذ],,,,
class,r,[ر],,,,
class,z,[ز],,,,
class,s,[س],,,,
class,ʃ,[ش],,,,
class,emph-s,[ص],,,,
class,emph-d,[ض],,,,
class,emph-t,[ط],,,,
class,emph-ð,[ظ],,,,
class,ʕ,[ع],,,,
class,ɣ,[غ],,,,
class,f,[ف],,,,
class,q,[ق],,,,
class,k,[ك],,,,
class,l,[ل],,,,
class,m,[م],,,,
class,n,[ن],,,,
class,h,[ه],,,,
class,w-uu,[و],,,,
class,j-ii,[ي],,,,
class,hamza,[ء],,,,
class,ta-marbuta,[ة],,,,
class,alif-maq,[ى],,,,
## Diacritics
class,u,[ُ],,,,
class,a,[َ],,,,
class,i,[ِ],,,,
class,sup-alif,[ٰ],,,,
class,dia-hamza-above,[ٔ],,,,,
class,dia-hamza-below,[ٕ],,,,
class,madda,[ٓ],,,,
class,shadda,[ّ],,,,
class,sukun,[ْ],,,,
class,a-nunnation,[ٌ],,,
class,i-nunnation,[ٍ],,,
class,u-nunnation,[ً],,,
## Permanent Diacritic Grapheme Combos (some diacritics are not treated as separate from the grapheme)
class,alif-hamza-above,[أ],,,,
class,alif-hamza-below,[إ],,,,
class,w-hamza,[ؤ],,,,
class,j-hamza,[ئ],,,,
class,alif-madda,[آ],,,,
# Sub Rules
## Graphemes
sub,{alif},a ː,2,,,
sub,{alif},ʔ,3,^,,"word-initial alif is /ʔ/, however, not every word-initial alif is realized as such (primarily the hamza is used) [@Coulmas2003, p. 123] - somewhat compromises the transcription",
sub,{b},b,2,,,
sub,{t},t,2,,,
sub,{θ},θ,2,,,
sub,{dʒ},dʒ,2,,,
sub,{ħ},ħ,2,,,
sub,{x},x,2,,,
sub,{d},d,2,,,
sub,{ð},ð,2,,,
sub,{r},r,2,,,
sub,{z},z,2,,,
sub,{s},s,2,,,
sub,{ʃ},ʃ,2,,,
sub,{emph-s},sˤ,2,,,
sub,{emph-d},dˤ,2,,,
sub,{emph-t},tˤ,2,,,
sub,{emph-ð},ðˤ,2,,,
sub,{ʕ},ʕ,2,,,
sub,{ɣ},ɣ,2,,,
sub,{f},f,2,,,
sub,{q},q,2,,,
sub,{k},k,2,,,
sub,{l},l,2,,,
sub,{m},m,2,,,
sub,{n},n,2,,,
sub,{h},h,2,,,
sub,{w-uu},w,2,,,
sub,{j-ii},j,2,,,
sub,{hamza},ʔ,2,,,
sub,{ta-marbuta},t,3,,{diacritic}$,"ta-marbuta occurs word-finally and if followed by a diacritic is recognized as /t/",
sub,{diacritic},,3,{ta-marbuta},$,"clean-up",
sub,{ta-marbuta},,2,,$,"ta-marbuta transcribes to nothing when not followed by a diacritic",
sub,{alif-maq},a,2,,$,"alif-maq occurs word-finally",
## Diacritics
sub,{u},u,2,,,
sub,{a},a,2,,,
sub,{i},i,2,,,
sub,{sup-alif},a ː,2,,,
sub,{dia-hamza-above},1ʔ,6,{hamza-combo},,"this transcribes the hamza sequences as the consonant followed by a glottal stop, but we need an ipasub rule to make the glottal stop precede the consonant",
sub,{alif},,4,^,{dia-hamza-above},"alif-hamza (above) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed",
sub,{alif},ʔ i,4,,{dia-hamza-below},
sub,{alif},ʔ,5,^,{dia-hamza-below},"alif-hamza (below) only corresponds to glottal stop word-initially, so the alif shouldn't be transcribed",
sub,{dia-hamza-below},,2,{alif},,"clean-up",
sub,{alif},ʔ a ː,3,,{madda},
sub,{shadda},1ː,2,,,"needed for ipasub gemination rules below",
sub,{sukun},,2,,,
sub,{a-nunnation},a n,3,,$,
sub,{a-nunnation},a n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant [@Habash2010, p. 11]",
sub,{i-nunnation},i n,3,,$,
sub,{i-nunnation},i n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)",
sub,{u-nunnation},u n,3,,$,
sub,{u-nunnation},u n,3,,{diacritic}$,"sometimes followed by a diacritic (shadda) which both apply to the consonant (ibid.)",
## Permanent Diacritic Grapheme Combos
sub,{alif-hamza-above},ʔ a ː,3,,,
sub,{alif-hamza-above},ʔ,4,^,,"alif-hamza (above) only corresponds to glottal stop word-initially - alif is a place holder",
sub,{alif-hamza-below},ʔ i,3,,,
sub,{alif-hamza-below},ʔ,4,^,,"alif-hamza (below) only corresponds to glottal stop word-initially - alif is a place holder",
sub,{w-hamza},ʔ w,2,,,
sub,{j-hamza},ʔ j,2,,,
sub,{alif-madda},ʔ a ː,2,,,
## Diphthongs [@Javed, p. 6]
sub,{a},aw,3,,{w-uu},
sub,{w-uu},,3,{a},,"clean-up",
sub,{a},aj,3,,{j-ii},
sub,{j-ii},,3,{a},,"clean-up",
# Additional Long Vowel Transcriptions
sub,{alif},,8,^,{shortV},"word-initial vowels are represented by an inflected alif [@Habash2012, p. 712]",
sub,{a},a ː,5,,{alif},
sub,{alif},,4,{a},,"clean-up",
sub,{a},a ː,5,,{alif-maq},
sub,{alif-maq},,4,{a},,"clean-up",
sub,{u},u 2ː,5,,{w-uu},
sub,{w-uu},,4,{u},,"clean-up",
sub,{i},i 3ː,5,,{j-ii},
sub,{j-ii},,4,{i},,"clean-up",
## Alif-maqsura
sub,{alif-maq},j ː,6,,{shortV}{shadda},"alif-maq changes into a yaa if followed by a diacritic (generally a shadda) [@Habash2010, p. 61]",
sub,{alif-maq},j ː,6,,{shadda},
sub,{shadda},,6,{alif-maq}{shortV},,
# ipasub Rules
## Consonant Gemination
ipasub,({consonant}) ({shortV-output}) 1ː,\1 ː \2,3,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a short vowel",
ipasub,({consonant}) a 1ː ({diphthong-combo}),\1 ː a\3,4,,,"controls for shadda geminating vowels depending on if the double consonant is also followed by a diphthong",
ipasub,({consonant}) 1ː,\1 ː,2,,,"this removes the 1 if the consonant is not followed by a short vowel"
ipasub,({shortV-output}) 2ː ({shortV-output}) 1ː,\1 w ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. Dammah + waw) with following short vowel",
ipasub,({shortV-output}) 2ː 1ː,\1 w ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. dammah + waw)",
ipasub,({shortV-output}) 2ː u n 1ː,\1 w ː u n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 2ː a n 1ː,\1 w ː a n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 2ː i n 1ː,\1 w ː i n,4,,,"flips around the nunation and gemination",
ipasub,2ː,ː,2,,,"removes the 2 from the rest of the elongated /u/s",
ipasub,({shortV-output}) 3ː ({shortV-output}) 1ː,\1 j ː \2,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa) with following short vowel",
ipasub,({shortV-output}) 3ː 1ː,\1 j ː,3,,,"controls for shadda taking priority over elongation of vowels (e.g. kasrah + yaa)",
ipasub,({shortV-output}) 3ː i n 1ː,\1 j ː i n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 3ː u n 1ː,\1 j ː u n,4,,,"flips around the nunation and gemination",
ipasub,({shortV-output}) 3ː a n 1ː,\1 j ː a n,4,,,"flips around the nunation and gemination",
ipasub,1ː,@,1,,,"rules out illegal combos (shadda appearing with nunnation word-medially over an alif)",
ipasub,3ː,ː,2,,"removes the 3 from the rest of the elongated /i/s",
## Hamza (glottal stop)
ipasub,({hamza-combo-output}) 1ʔ,ʔ \1,3,,,"this puts the glottal stop before the character carrying the hamza",
ipasub,a ː 1ʔ ({shortV-output}),ʔ \1,4,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here",
ipasub,ʔ a ː ({shortV-output}),ʔ \1,2,,,"word-medial glottal stops are sometimes represented as alif topped with a hamza and a short vowel diacritic, the alif holds no value here (this is the same rule as the one above but uses the permanent alf-hamza-above character)",
|