jonathanjordan21 commited on
Commit
085f4e9
·
verified ·
1 Parent(s): 289aa14

Update idn_phonemes.py

Browse files
Files changed (1) hide show
  1. idn_phonemes.py +66 -3
idn_phonemes.py CHANGED
@@ -75,18 +75,81 @@ def number_to_words(n: int) -> str:
75
  else:
76
  return str(n) # fallback
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def indo_to_ipa(text: str) -> str:
79
  text = text.lower()
80
 
81
- # Cari semua angka dalam teks dan ubah ke kata
 
 
 
 
 
 
 
 
 
82
  def replace_number(match):
83
  num = int(match.group())
84
  return number_to_words(num)
85
-
86
  text = re.sub(r"\d+", replace_number, text)
87
 
88
  # Konversi huruf → IPA
89
  for k in sorted(ipa_map.keys(), key=lambda x: -len(x)):
90
  text = re.sub(k, ipa_map[k], text)
91
 
92
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  else:
76
  return str(n) # fallback
77
 
78
+
79
+
80
+ def expand_abbreviation(word: str) -> str:
81
+ """Ubah singkatan (huruf kapital) jadi ucapan Indonesia"""
82
+ if word.isupper() and len(word) > 1: # contoh: KTP, DPR, RI
83
+ return " ".join(letter_words.get(ch.lower(), ch) for ch in word)
84
+ return word
85
+
86
+
87
+ letter_words = {
88
+ "a": "a",
89
+ "b": "be",
90
+ "c": "ce",
91
+ "d": "de",
92
+ "e": "e",
93
+ "f": "ef",
94
+ "g": "ge",
95
+ "h": "ha",
96
+ "i": "i",
97
+ "j": "je",
98
+ "k": "ka",
99
+ "l": "el",
100
+ "m": "em",
101
+ "n": "en",
102
+ "o": "o",
103
+ "p": "pe",
104
+ "q": "ki",
105
+ "r": "er",
106
+ "s": "es",
107
+ "t": "te",
108
+ "u": "u",
109
+ "v": "fe",
110
+ "w": "we",
111
+ "x": "eks",
112
+ "y": "ye",
113
+ "z": "zet",
114
+ }
115
+
116
+
117
  def indo_to_ipa(text: str) -> str:
118
  text = text.lower()
119
 
120
+ # Tangani singkatan (huruf kapital semua)
121
+ words = []
122
+ for w in text.split():
123
+ if w.isupper() and len(w) > 1:
124
+ words.append(expand_abbreviation(w))
125
+ else:
126
+ words.append(w)
127
+ text = " ".join(words)
128
+
129
+ # Tangani angka → kata
130
  def replace_number(match):
131
  num = int(match.group())
132
  return number_to_words(num)
 
133
  text = re.sub(r"\d+", replace_number, text)
134
 
135
  # Konversi huruf → IPA
136
  for k in sorted(ipa_map.keys(), key=lambda x: -len(x)):
137
  text = re.sub(k, ipa_map[k], text)
138
 
139
+ return text
140
+
141
+ # def indo_to_ipa(text: str) -> str:
142
+ # text = text.lower()
143
+
144
+ # # Cari semua angka dalam teks dan ubah ke kata
145
+ # def replace_number(match):
146
+ # num = int(match.group())
147
+ # return number_to_words(num)
148
+
149
+ # text = re.sub(r"\d+", replace_number, text)
150
+
151
+ # # Konversi huruf → IPA
152
+ # for k in sorted(ipa_map.keys(), key=lambda x: -len(x)):
153
+ # text = re.sub(k, ipa_map[k], text)
154
+
155
+ # return text