Update app.py
Browse files
app.py
CHANGED
|
@@ -99,6 +99,38 @@ latin_to_ipa = {
|
|
| 99 |
"!": "!"
|
| 100 |
}
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
def clean_arabic_text(text):
|
| 103 |
"""Remove diacritics from Arabic text for translation"""
|
| 104 |
if not text:
|
|
@@ -256,6 +288,52 @@ def latin_to_ipa_conversion(latin_text):
|
|
| 256 |
|
| 257 |
return output
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
def arabic_tts(arabic_text):
|
| 260 |
"""Generate TTS audio for Arabic text."""
|
| 261 |
if not arabic_text or not arabic_text.strip():
|
|
@@ -486,7 +564,37 @@ with gr.Blocks(title="Arabic Transliterator with Google Translate") as demo:
|
|
| 486 |
arabic_audio = gr.Audio(label="Arabic Audio", type="filepath")
|
| 487 |
with gr.Column():
|
| 488 |
ipa_audio = gr.Audio(label="IPA Pronunciation Audio (English reading)", type="filepath")
|
| 489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
# Manual translation section
|
| 491 |
with gr.Accordion("🔍 Manual Translation", open=False):
|
| 492 |
with gr.Row():
|
|
@@ -570,6 +678,13 @@ with gr.Blocks(title="Arabic Transliterator with Google Translate") as demo:
|
|
| 570 |
inputs=latin_input,
|
| 571 |
outputs=[arabic_output, ipa_output, arabic_audio, ipa_audio, translation_output]
|
| 572 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
# Manual translation handlers
|
| 575 |
manual_translate_btn.click(
|
|
|
|
| 99 |
"!": "!"
|
| 100 |
}
|
| 101 |
|
| 102 |
+
# Buckwalter transliteration mappings
|
| 103 |
+
arabic_to_buckwalter = {
|
| 104 |
+
# Basic letters
|
| 105 |
+
'ا': 'A', 'ب': 'b', 'ت': 't', 'ث': 'v', 'ج': 'j', 'ح': 'H', 'خ': 'x',
|
| 106 |
+
'د': 'd', 'ذ': '*', 'ر': 'r', 'ز': 'z', 'س': 's', 'ش': '$', 'ص': 'S',
|
| 107 |
+
'ض': 'D', 'ط': 'T', 'ظ': 'Z', 'ع': 'E', 'غ': 'g', 'ف': 'f', 'ق': 'q',
|
| 108 |
+
'ك': 'k', 'ل': 'l', 'م': 'm', 'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y',
|
| 109 |
+
|
| 110 |
+
# Hamza variants
|
| 111 |
+
'ء': "'", 'أ': '>', 'إ': '<', 'آ': '|', 'ؤ': '&', 'ئ': '}',
|
| 112 |
+
|
| 113 |
+
# Taa marbuta
|
| 114 |
+
'ة': 'p',
|
| 115 |
+
|
| 116 |
+
# Alif maqsura
|
| 117 |
+
'ى': 'Y',
|
| 118 |
+
|
| 119 |
+
# Diacritics
|
| 120 |
+
'َ': 'a', 'ُ': 'u', 'ِ': 'i', 'ً': 'F', 'ٌ': 'N', 'ٍ': 'K',
|
| 121 |
+
'ّ': '~', 'ْ': 'o',
|
| 122 |
+
|
| 123 |
+
# Punctuation
|
| 124 |
+
'،': ',', '؛': ';', '؟': '?', '«': '"', '»': '"',
|
| 125 |
+
|
| 126 |
+
# Spaces and numbers
|
| 127 |
+
' ': ' ', '٠': '0', '١': '1', '٢': '2', '٣': '3', '٤': '4',
|
| 128 |
+
'٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9'
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
# Reverse mapping for Buckwalter to Arabic
|
| 132 |
+
buckwalter_to_arabic = {v: k for k, v in arabic_to_buckwalter.items()}
|
| 133 |
+
|
| 134 |
def clean_arabic_text(text):
|
| 135 |
"""Remove diacritics from Arabic text for translation"""
|
| 136 |
if not text:
|
|
|
|
| 288 |
|
| 289 |
return output
|
| 290 |
|
| 291 |
+
def arabic_to_buckwalter_convert(arabic_text):
|
| 292 |
+
"""Convert Arabic text to Buckwalter transliteration."""
|
| 293 |
+
if not arabic_text:
|
| 294 |
+
return ""
|
| 295 |
+
|
| 296 |
+
result = ""
|
| 297 |
+
for char in arabic_text:
|
| 298 |
+
if char in arabic_to_buckwalter:
|
| 299 |
+
result += arabic_to_buckwalter[char]
|
| 300 |
+
else:
|
| 301 |
+
result += char # Keep unknown characters as-is
|
| 302 |
+
|
| 303 |
+
return result
|
| 304 |
+
|
| 305 |
+
def buckwalter_to_arabic_convert(buckwalter_text):
|
| 306 |
+
"""Convert Buckwalter transliteration to Arabic text."""
|
| 307 |
+
if not buckwalter_text:
|
| 308 |
+
return ""
|
| 309 |
+
|
| 310 |
+
result = ""
|
| 311 |
+
for char in buckwalter_text:
|
| 312 |
+
if char in buckwalter_to_arabic:
|
| 313 |
+
result += buckwalter_to_arabic[char]
|
| 314 |
+
else:
|
| 315 |
+
result += char # Keep unknown characters as-is
|
| 316 |
+
|
| 317 |
+
return result
|
| 318 |
+
|
| 319 |
+
def bidirectional_buckwalter_convert(input_text):
|
| 320 |
+
"""Automatically detect and convert between Arabic and Buckwalter."""
|
| 321 |
+
if not input_text.strip():
|
| 322 |
+
return "", ""
|
| 323 |
+
|
| 324 |
+
# Simple detection: if text contains Arabic characters, convert to Buckwalter
|
| 325 |
+
# Otherwise, assume it's Buckwalter and convert to Arabic
|
| 326 |
+
has_arabic = any('\u0600' <= char <= '\u06FF' for char in input_text)
|
| 327 |
+
|
| 328 |
+
if has_arabic:
|
| 329 |
+
# Arabic to Buckwalter
|
| 330 |
+
buckwalter_result = arabic_to_buckwalter_convert(input_text)
|
| 331 |
+
return buckwalter_result, "Arabic → Buckwalter"
|
| 332 |
+
else:
|
| 333 |
+
# Buckwalter to Arabic
|
| 334 |
+
arabic_result = buckwalter_to_arabic_convert(input_text)
|
| 335 |
+
return arabic_result, "Buckwalter → Arabic"
|
| 336 |
+
|
| 337 |
def arabic_tts(arabic_text):
|
| 338 |
"""Generate TTS audio for Arabic text."""
|
| 339 |
if not arabic_text or not arabic_text.strip():
|
|
|
|
| 564 |
arabic_audio = gr.Audio(label="Arabic Audio", type="filepath")
|
| 565 |
with gr.Column():
|
| 566 |
ipa_audio = gr.Audio(label="IPA Pronunciation Audio (English reading)", type="filepath")
|
| 567 |
+
|
| 568 |
+
# Buckwalter transliteration section
|
| 569 |
+
with gr.Accordion("🔄 Buckwalter Transliteration", open=False):
|
| 570 |
+
gr.Markdown("**Bi-directional Arabic ↔ Buckwalter transliteration**")
|
| 571 |
+
gr.Markdown("Enter Arabic text or Buckwalter notation. The system will auto-detect and convert.")
|
| 572 |
+
|
| 573 |
+
with gr.Row():
|
| 574 |
+
with gr.Column():
|
| 575 |
+
buckwalter_input = gr.Textbox(
|
| 576 |
+
label="Input (Arabic or Buckwalter)",
|
| 577 |
+
placeholder="Enter Arabic: السلام عليكم OR Buckwalter: AlslAm Elyokm",
|
| 578 |
+
lines=3,
|
| 579 |
+
show_copy_button=True
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
with gr.Column():
|
| 583 |
+
buckwalter_output = gr.Textbox(
|
| 584 |
+
label="Converted Output",
|
| 585 |
+
lines=3,
|
| 586 |
+
show_copy_button=True
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
with gr.Row():
|
| 590 |
+
buckwalter_convert_btn = gr.Button("🔄 Convert", variant="primary")
|
| 591 |
+
conversion_direction = gr.Textbox(
|
| 592 |
+
label="Conversion Direction",
|
| 593 |
+
value="",
|
| 594 |
+
interactive=False,
|
| 595 |
+
max_lines=1
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
# Manual translation section
|
| 599 |
with gr.Accordion("🔍 Manual Translation", open=False):
|
| 600 |
with gr.Row():
|
|
|
|
| 678 |
inputs=latin_input,
|
| 679 |
outputs=[arabic_output, ipa_output, arabic_audio, ipa_audio, translation_output]
|
| 680 |
)
|
| 681 |
+
|
| 682 |
+
# Buckwalter conversion handler
|
| 683 |
+
buckwalter_convert_btn.click(
|
| 684 |
+
fn=bidirectional_buckwalter_convert,
|
| 685 |
+
inputs=buckwalter_input,
|
| 686 |
+
outputs=[buckwalter_output, conversion_direction]
|
| 687 |
+
)
|
| 688 |
|
| 689 |
# Manual translation handlers
|
| 690 |
manual_translate_btn.click(
|