kambris commited on
Commit
96a8fca
·
verified ·
1 Parent(s): d5decbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -1
app.py CHANGED
@@ -99,6 +99,38 @@ latin_to_ipa = {
99
  "!": "!"
100
  }
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def clean_arabic_text(text):
103
  """Remove diacritics from Arabic text for translation"""
104
  if not text:
@@ -256,6 +288,52 @@ def latin_to_ipa_conversion(latin_text):
256
 
257
  return output
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  def arabic_tts(arabic_text):
260
  """Generate TTS audio for Arabic text."""
261
  if not arabic_text or not arabic_text.strip():
@@ -486,7 +564,37 @@ with gr.Blocks(title="Arabic Transliterator with Google Translate") as demo:
486
  arabic_audio = gr.Audio(label="Arabic Audio", type="filepath")
487
  with gr.Column():
488
  ipa_audio = gr.Audio(label="IPA Pronunciation Audio (English reading)", type="filepath")
489
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  # Manual translation section
491
  with gr.Accordion("🔍 Manual Translation", open=False):
492
  with gr.Row():
@@ -570,6 +678,13 @@ with gr.Blocks(title="Arabic Transliterator with Google Translate") as demo:
570
  inputs=latin_input,
571
  outputs=[arabic_output, ipa_output, arabic_audio, ipa_audio, translation_output]
572
  )
 
 
 
 
 
 
 
573
 
574
  # Manual translation handlers
575
  manual_translate_btn.click(
 
99
  "!": "!"
100
  }
101
 
102
+ # Buckwalter transliteration mappings
103
+ arabic_to_buckwalter = {
104
+ # Basic letters
105
+ 'ا': 'A', 'ب': 'b', 'ت': 't', 'ث': 'v', 'ج': 'j', 'ح': 'H', 'خ': 'x',
106
+ 'د': 'd', 'ذ': '*', 'ر': 'r', 'ز': 'z', 'س': 's', 'ش': '$', 'ص': 'S',
107
+ 'ض': 'D', 'ط': 'T', 'ظ': 'Z', 'ع': 'E', 'غ': 'g', 'ف': 'f', 'ق': 'q',
108
+ 'ك': 'k', 'ل': 'l', 'م': 'm', 'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y',
109
+
110
+ # Hamza variants
111
+ 'ء': "'", 'أ': '>', 'إ': '<', 'آ': '|', 'ؤ': '&', 'ئ': '}',
112
+
113
+ # Taa marbuta
114
+ 'ة': 'p',
115
+
116
+ # Alif maqsura
117
+ 'ى': 'Y',
118
+
119
+ # Diacritics
120
+ 'َ': 'a', 'ُ': 'u', 'ِ': 'i', 'ً': 'F', 'ٌ': 'N', 'ٍ': 'K',
121
+ 'ّ': '~', 'ْ': 'o',
122
+
123
+ # Punctuation
124
+ '،': ',', '؛': ';', '؟': '?', '«': '"', '»': '"',
125
+
126
+ # Spaces and numbers
127
+ ' ': ' ', '٠': '0', '١': '1', '٢': '2', '٣': '3', '٤': '4',
128
+ '٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9'
129
+ }
130
+
131
+ # Reverse mapping for Buckwalter to Arabic
132
+ buckwalter_to_arabic = {v: k for k, v in arabic_to_buckwalter.items()}
133
+
134
  def clean_arabic_text(text):
135
  """Remove diacritics from Arabic text for translation"""
136
  if not text:
 
288
 
289
  return output
290
 
291
+ def arabic_to_buckwalter_convert(arabic_text):
292
+ """Convert Arabic text to Buckwalter transliteration."""
293
+ if not arabic_text:
294
+ return ""
295
+
296
+ result = ""
297
+ for char in arabic_text:
298
+ if char in arabic_to_buckwalter:
299
+ result += arabic_to_buckwalter[char]
300
+ else:
301
+ result += char # Keep unknown characters as-is
302
+
303
+ return result
304
+
305
+ def buckwalter_to_arabic_convert(buckwalter_text):
306
+ """Convert Buckwalter transliteration to Arabic text."""
307
+ if not buckwalter_text:
308
+ return ""
309
+
310
+ result = ""
311
+ for char in buckwalter_text:
312
+ if char in buckwalter_to_arabic:
313
+ result += buckwalter_to_arabic[char]
314
+ else:
315
+ result += char # Keep unknown characters as-is
316
+
317
+ return result
318
+
319
+ def bidirectional_buckwalter_convert(input_text):
320
+ """Automatically detect and convert between Arabic and Buckwalter."""
321
+ if not input_text.strip():
322
+ return "", ""
323
+
324
+ # Simple detection: if text contains Arabic characters, convert to Buckwalter
325
+ # Otherwise, assume it's Buckwalter and convert to Arabic
326
+ has_arabic = any('\u0600' <= char <= '\u06FF' for char in input_text)
327
+
328
+ if has_arabic:
329
+ # Arabic to Buckwalter
330
+ buckwalter_result = arabic_to_buckwalter_convert(input_text)
331
+ return buckwalter_result, "Arabic → Buckwalter"
332
+ else:
333
+ # Buckwalter to Arabic
334
+ arabic_result = buckwalter_to_arabic_convert(input_text)
335
+ return arabic_result, "Buckwalter → Arabic"
336
+
337
  def arabic_tts(arabic_text):
338
  """Generate TTS audio for Arabic text."""
339
  if not arabic_text or not arabic_text.strip():
 
564
  arabic_audio = gr.Audio(label="Arabic Audio", type="filepath")
565
  with gr.Column():
566
  ipa_audio = gr.Audio(label="IPA Pronunciation Audio (English reading)", type="filepath")
567
+
568
+ # Buckwalter transliteration section
569
+ with gr.Accordion("🔄 Buckwalter Transliteration", open=False):
570
+ gr.Markdown("**Bi-directional Arabic ↔ Buckwalter transliteration**")
571
+ gr.Markdown("Enter Arabic text or Buckwalter notation. The system will auto-detect and convert.")
572
+
573
+ with gr.Row():
574
+ with gr.Column():
575
+ buckwalter_input = gr.Textbox(
576
+ label="Input (Arabic or Buckwalter)",
577
+ placeholder="Enter Arabic: السلام عليكم OR Buckwalter: AlslAm Elyokm",
578
+ lines=3,
579
+ show_copy_button=True
580
+ )
581
+
582
+ with gr.Column():
583
+ buckwalter_output = gr.Textbox(
584
+ label="Converted Output",
585
+ lines=3,
586
+ show_copy_button=True
587
+ )
588
+
589
+ with gr.Row():
590
+ buckwalter_convert_btn = gr.Button("🔄 Convert", variant="primary")
591
+ conversion_direction = gr.Textbox(
592
+ label="Conversion Direction",
593
+ value="",
594
+ interactive=False,
595
+ max_lines=1
596
+ )
597
+
598
  # Manual translation section
599
  with gr.Accordion("🔍 Manual Translation", open=False):
600
  with gr.Row():
 
678
  inputs=latin_input,
679
  outputs=[arabic_output, ipa_output, arabic_audio, ipa_audio, translation_output]
680
  )
681
+
682
+ # Buckwalter conversion handler
683
+ buckwalter_convert_btn.click(
684
+ fn=bidirectional_buckwalter_convert,
685
+ inputs=buckwalter_input,
686
+ outputs=[buckwalter_output, conversion_direction]
687
+ )
688
 
689
  # Manual translation handlers
690
  manual_translate_btn.click(