kambris commited on
Commit
06e108c
Β·
verified Β·
1 Parent(s): ddaedae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -175
app.py CHANGED
@@ -2,10 +2,7 @@ import gradio as gr
2
  from gtts import gTTS
3
  import tempfile
4
  import os
5
- import requests
6
- from bs4 import BeautifulSoup
7
- import re
8
- import urllib.parse
9
 
10
  # Enhanced transliteration map with proper Arabic characters
11
  translit_map = {
@@ -102,8 +99,11 @@ latin_to_ipa = {
102
  "!": "!"
103
  }
104
 
 
 
 
105
  def clean_arabic_text(text):
106
- """Remove diacritics from Arabic text for dictionary lookup"""
107
  if not text:
108
  return ""
109
 
@@ -115,166 +115,99 @@ def clean_arabic_text(text):
115
 
116
  return cleaned.strip()
117
 
118
- def lookup_dictionary(arabic_word):
119
  """
120
- Lookup Arabic word in the Arabic Lexicon dictionary
121
- Returns dictionary definition and related information
122
  """
123
- if not arabic_word or not arabic_word.strip():
124
- return "No word provided for lookup."
125
 
126
  try:
127
- # Clean the word for lookup
128
- clean_word = clean_arabic_text(arabic_word)
129
-
130
- # Encode the Arabic word for URL
131
- encoded_word = urllib.parse.quote(clean_word)
132
-
133
- # Construct the search URL for Arabic Lexicon with cat=9 (appears to be a specific category)
134
- search_url = f"https://arabiclexicon.hawramani.com/search/{encoded_word}?cat=9"
135
-
136
- # Set headers to mimic a browser request
137
- headers = {
138
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
139
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
140
- 'Accept-Language': 'en-US,en;q=0.5',
141
- 'Accept-Encoding': 'gzip, deflate',
142
- 'Connection': 'keep-alive'
143
- }
144
 
145
- # Make the request
146
- response = requests.get(search_url, headers=headers, timeout=15)
147
- response.raise_for_status()
148
 
149
- # Parse the HTML response
150
- soup = BeautifulSoup(response.content, 'html.parser')
151
 
152
- # Extract search results - based on the structure I can see
153
- results = []
 
 
 
 
154
 
155
- # Look for the main content spans that contain the definitions
156
- content_spans = soup.find_all('span', {'index': True})
 
157
 
158
- if content_spans:
159
- for span in content_spans[:2]: # Limit to first 2 spans
160
- text_content = span.get_text(strip=True)
161
- if text_content and len(text_content) > 20:
162
- # Clean up the text and format it nicely
163
- clean_text = text_content.replace('\n', ' ').replace(' ', ' ')
164
- results.append(clean_text[:500] + "..." if len(clean_text) > 500 else clean_text)
165
 
166
- # Fallback: look for any Arabic text content if spans don't work
167
- if not results:
168
- # Look for divs or other containers with Arabic text
169
- arabic_text_elements = soup.find_all(text=re.compile(r'[\u0600-\u06FF]{3,}'))
170
-
171
- for element in arabic_text_elements[:3]:
172
- parent_text = element.parent.get_text(strip=True) if element.parent else str(element)
173
- if len(parent_text) > 30 and clean_word in parent_text:
174
- clean_text = parent_text.replace('\n', ' ').replace(' ', ' ')
175
- results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
176
 
177
- if results:
178
- formatted_results = f"πŸ“– **Dictionary Results for '{arabic_word}':**\n\n"
179
- for i, result in enumerate(results, 1):
180
- formatted_results += f"**{i}.** {result}\n\n"
181
-
182
- formatted_results += f"\nπŸ”— **Full results:** [View on Arabic Lexicon]({search_url})"
183
- return formatted_results
184
- else:
185
- return f"πŸ“– No dictionary results found for '{arabic_word}'.\n\nπŸ”— **Try manual search:** [Search on Arabic Lexicon]({search_url})"
186
-
187
- except requests.exceptions.RequestException as e:
188
- return f"❌ Dictionary lookup failed: Network error. Please check your internet connection.\n\nError: {str(e)}"
189
  except Exception as e:
190
- return f"❌ Dictionary lookup failed: {str(e)}\n\nπŸ”— **Try manual search:** https://arabiclexicon.hawramani.com"
191
-
192
 
193
- def lookup_dictionary_alternative(arabic_word):
194
  """
195
- Alternative lookup method using the old search format as fallback
196
  """
197
- if not arabic_word or not arabic_word.strip():
198
- return "No word provided for lookup."
199
 
200
  try:
201
- clean_word = clean_arabic_text(arabic_word)
202
- encoded_word = urllib.parse.quote(clean_word)
203
-
204
- # Try the original search format as fallback
205
- search_url = f"https://arabiclexicon.hawramani.com/?search={encoded_word}&cat=9"
206
-
207
- headers = {
208
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
209
- }
210
-
211
- response = requests.get(search_url, headers=headers, timeout=15)
212
- response.raise_for_status()
213
-
214
- soup = BeautifulSoup(response.content, 'html.parser')
215
 
216
- # Look for search results in tables or divs
217
- results = []
218
 
219
- # Try to find table rows or result containers
220
- result_elements = soup.find_all(['tr', 'div', 'p'], string=re.compile(r'[\u0600-\u06FF]+'))
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
- for element in result_elements[:3]:
223
- text_content = element.get_text(strip=True)
224
- if text_content and len(text_content) > 20 and clean_word in text_content:
225
- clean_text = text_content.replace('\n', ' ').replace(' ', ' ')
226
- results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
227
 
228
- if results:
229
- formatted_results = f"πŸ“– **Dictionary Results for '{arabic_word}' (Alternative Search):**\n\n"
230
- for i, result in enumerate(results, 1):
231
- formatted_results += f"**{i}.** {result}\n\n"
232
-
233
- formatted_results += f"\nπŸ”— **Full results:** [View on Arabic Lexicon]({search_url})"
234
- return formatted_results
235
- else:
236
- return f"πŸ“– No results found with alternative search for '{arabic_word}'.\n\nπŸ”— **Try manual search:** [Search on Arabic Lexicon]({search_url})"
237
-
238
  except Exception as e:
239
- return f"❌ Alternative dictionary lookup failed: {str(e)}"
240
 
241
-
242
- def lookup_dictionary_with_fallback(arabic_word):
243
  """
244
- Main lookup function that tries the new format first, then falls back to the old format
245
  """
246
- # Try the new format first
247
- result = lookup_dictionary(arabic_word)
248
-
249
- # If no results found, try the alternative format
250
- if "No dictionary results found" in result:
251
- alternative_result = lookup_dictionary_alternative(arabic_word)
252
- if "No results found" not in alternative_result:
253
- return alternative_result
254
 
255
- return result
256
-
257
- def lookup_multiple_words(arabic_text):
258
- """
259
- Lookup multiple Arabic words separated by spaces
260
- """
261
- if not arabic_text or not arabic_text.strip():
262
- return "No text provided for lookup."
263
-
264
- words = arabic_text.strip().split()
265
- if len(words) == 1:
266
- return lookup_dictionary(words[0])
267
-
268
- results = []
269
- for word in words[:5]: # Limit to first 5 words to avoid overwhelming
270
- if len(word.strip()) > 1: # Skip single characters
271
- result = lookup_dictionary(word)
272
- results.append(f"### Word: {word}\n{result}\n" + "─"*50 + "\n")
273
-
274
- if results:
275
- return "\n".join(results)
276
- else:
277
- return "No valid words found for dictionary lookup."
278
 
279
  def generate_letter_audio(arabic_letter):
280
  """Generate TTS audio for a single Arabic letter or character."""
@@ -346,7 +279,6 @@ def arabic_tts(arabic_text):
346
 
347
  try:
348
  tts = gTTS(text=arabic_text, lang='ar', slow=False)
349
- # Create temporary file with proper cleanup
350
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
351
  temp_path = tmp_file.name
352
  tts.save(temp_path)
@@ -361,7 +293,6 @@ def ipa_tts(ipa_text):
361
  return None
362
 
363
  try:
364
- # Use English TTS for IPA reading
365
  tts = gTTS(text=ipa_text, lang='en', slow=True)
366
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
367
  temp_path = tmp_file.name
@@ -386,19 +317,19 @@ def full_process_with_ipa_audio(latin_text):
386
  ipa_audio = ipa_tts(ipa_text)
387
  return arabic_text, ipa_text, arabic_audio, ipa_audio
388
 
389
- def full_process_with_dictionary(latin_text):
390
- """Complete processing: transliterate, generate audio, and lookup dictionary."""
391
  arabic_text = transliterate(latin_text)
392
  ipa_text = latin_to_ipa_conversion(latin_text)
393
  arabic_audio = arabic_tts(arabic_text)
394
  ipa_audio = ipa_tts(ipa_text)
395
- dictionary_result = lookup_multiple_words(arabic_text)
396
- return arabic_text, ipa_text, arabic_audio, ipa_audio, dictionary_result
397
 
398
  # Create Gradio interface
399
- with gr.Blocks(title="Arabic Transliterator with Dictionary") as demo:
400
- gr.Markdown("## πŸ“ Latin-to-Arabic Transliterator with Dictionary Lookup")
401
- gr.Markdown("Enter Latin characters to convert to Arabic script, generate audio, and lookup meanings in Arabic dictionaries.")
402
 
403
  # Full-width transliteration guide with audio buttons
404
  with gr.Accordion("πŸ“– Interactive Transliteration Guide with Audio", open=False):
@@ -537,12 +468,12 @@ with gr.Blocks(title="Arabic Transliterator with Dictionary") as demo:
537
  placeholder="Shows how the Arabic sounds should be pronounced using IPA symbols"
538
  )
539
 
540
- # Dictionary lookup section
541
  with gr.Row():
542
- dictionary_output = gr.Textbox(
543
- label="πŸ“– Dictionary Lookup Results",
544
  lines=8,
545
- placeholder="Dictionary definitions and meanings will appear here...",
546
  show_copy_button=True
547
  )
548
 
@@ -551,12 +482,12 @@ with gr.Blocks(title="Arabic Transliterator with Dictionary") as demo:
551
  convert_btn = gr.Button("πŸ”„ Transliterate", variant="primary")
552
  tts_btn = gr.Button("πŸ”Š Generate Arabic Sound", variant="secondary")
553
  ipa_btn = gr.Button("πŸ”€ Generate IPA Text", variant="secondary")
554
- dict_btn = gr.Button("πŸ“– Dictionary Lookup", variant="secondary")
555
 
556
  with gr.Row():
557
  combined_btn = gr.Button("πŸ”„πŸ”Š Transliterate & Speak", variant="secondary")
558
  full_process_btn = gr.Button("πŸ”„πŸ”€πŸ”Š Full Process + IPA Audio", variant="secondary")
559
- complete_btn = gr.Button("πŸ”„πŸ“–πŸ”Š Complete Process + Dictionary", variant="primary")
560
 
561
  # Audio outputs
562
  with gr.Row():
@@ -565,23 +496,37 @@ with gr.Blocks(title="Arabic Transliterator with Dictionary") as demo:
565
  with gr.Column():
566
  ipa_audio = gr.Audio(label="IPA Pronunciation Audio (English reading)", type="filepath")
567
 
568
- # Manual dictionary lookup section
569
- with gr.Accordion("πŸ” Manual Dictionary Lookup", open=False):
570
  with gr.Row():
571
- manual_lookup_input = gr.Textbox(
572
- label="Enter Arabic word(s) to lookup",
573
- placeholder="Enter Arabic text for dictionary lookup",
574
- rtl=True
575
- )
576
- manual_lookup_btn = gr.Button("πŸ” Lookup", variant="secondary")
 
 
 
 
 
 
 
 
577
 
578
- manual_lookup_output = gr.Textbox(
579
- label="Manual Lookup Results",
580
  lines=6,
581
  show_copy_button=True
582
  )
 
 
 
 
 
 
583
 
584
- # External IPA resources - moved to bottom
585
  with gr.Accordion("🎯 Free IPA Pronunciation Tools (More Accurate)", open=False):
586
  gr.Markdown("""
587
  **🌐 Online (No Download Required):**
@@ -611,10 +556,10 @@ with gr.Blocks(title="Arabic Transliterator with Dictionary") as demo:
611
  outputs=arabic_audio
612
  )
613
 
614
- dict_btn.click(
615
- fn=lookup_multiple_words,
616
  inputs=arabic_output,
617
- outputs=dictionary_output
618
  )
619
 
620
  combined_btn.click(
@@ -630,16 +575,22 @@ with gr.Blocks(title="Arabic Transliterator with Dictionary") as demo:
630
  )
631
 
632
  complete_btn.click(
633
- fn=full_process_with_dictionary,
634
  inputs=latin_input,
635
- outputs=[arabic_output, ipa_output, arabic_audio, ipa_audio, dictionary_output]
 
 
 
 
 
 
 
636
  )
637
 
638
- # Manual lookup handlers
639
- manual_lookup_btn.click(
640
- fn=lookup_multiple_words,
641
- inputs=manual_lookup_input,
642
- outputs=manual_lookup_output
643
  )
644
 
645
  if __name__ == "__main__":
 
2
  from gtts import gTTS
3
  import tempfile
4
  import os
5
+ from googletrans import Translator
 
 
 
6
 
7
  # Enhanced transliteration map with proper Arabic characters
8
  translit_map = {
 
99
  "!": "!"
100
  }
101
 
102
+ # Initialize Google Translator
103
+ translator = Translator()
104
+
105
  def clean_arabic_text(text):
106
+ """Remove diacritics from Arabic text for translation"""
107
  if not text:
108
  return ""
109
 
 
115
 
116
  return cleaned.strip()
117
 
118
+ def google_translate_arabic(arabic_text):
119
  """
120
+ Translate Arabic text to English using Google Translate
121
+ Returns translation and detected language info
122
  """
123
+ if not arabic_text or not arabic_text.strip():
124
+ return "No text provided for translation."
125
 
126
  try:
127
+ # Clean the text
128
+ clean_text = clean_arabic_text(arabic_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ # Detect language first
131
+ detection = translator.detect(clean_text)
 
132
 
133
+ # Translate to English
134
+ translation = translator.translate(clean_text, src='ar', dest='en')
135
 
136
+ # Format the result
137
+ result = f"🌐 **Google Translate Results:**\n\n"
138
+ result += f"**Original Arabic:** {arabic_text}\n\n"
139
+ result += f"**Cleaned Text:** {clean_text}\n\n"
140
+ result += f"**Detected Language:** {detection.lang} (confidence: {detection.confidence:.2f})\n\n"
141
+ result += f"**English Translation:** {translation.text}\n\n"
142
 
143
+ # Get pronunciation if available
144
+ if hasattr(translation, 'pronunciation') and translation.pronunciation:
145
+ result += f"**Pronunciation:** {translation.pronunciation}\n\n"
146
 
147
+ result += f"**Translation Confidence:** Automatic detection\n"
148
+ result += f"**Source:** Google Translate"
 
 
 
 
 
149
 
150
+ return result
 
 
 
 
 
 
 
 
 
151
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  except Exception as e:
153
+ return f"❌ Google Translate error: {str(e)}\n\nPlease check your internet connection and try again."
 
154
 
155
+ def translate_multiple_words(arabic_text):
156
  """
157
+ Translate multiple Arabic words - both individually and as a whole phrase
158
  """
159
+ if not arabic_text or not arabic_text.strip():
160
+ return "No text provided for translation."
161
 
162
  try:
163
+ # First translate the whole phrase
164
+ full_translation = google_translate_arabic(arabic_text)
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ # Then translate individual words if there are multiple words
167
+ words = arabic_text.strip().split()
168
 
169
+ if len(words) > 1:
170
+ individual_translations = []
171
+ for i, word in enumerate(words[:5], 1): # Limit to first 5 words
172
+ if len(word.strip()) > 1: # Skip single characters
173
+ try:
174
+ word_translation = translator.translate(clean_arabic_text(word), src='ar', dest='en')
175
+ individual_translations.append(f"**{i}. {word}** β†’ {word_translation.text}")
176
+ except:
177
+ individual_translations.append(f"**{i}. {word}** β†’ (translation failed)")
178
+
179
+ if individual_translations:
180
+ result = full_translation + "\n\n" + "─"*50 + "\n\n"
181
+ result += "πŸ“ **Individual Word Translations:**\n\n"
182
+ result += "\n".join(individual_translations)
183
+ return result
184
 
185
+ return full_translation
 
 
 
 
186
 
 
 
 
 
 
 
 
 
 
 
187
  except Exception as e:
188
+ return f"❌ Translation error: {str(e)}"
189
 
190
+ def reverse_translate_english(english_text):
 
191
  """
192
+ Translate English text to Arabic using Google Translate
193
  """
194
+ if not english_text or not english_text.strip():
195
+ return "No text provided for translation."
 
 
 
 
 
 
196
 
197
+ try:
198
+ # Translate to Arabic
199
+ translation = translator.translate(english_text, src='en', dest='ar')
200
+
201
+ # Format the result
202
+ result = f"🌐 **English to Arabic Translation:**\n\n"
203
+ result += f"**Original English:** {english_text}\n\n"
204
+ result += f"**Arabic Translation:** {translation.text}\n\n"
205
+ result += f"**Source:** Google Translate"
206
+
207
+ return result, translation.text
208
+
209
+ except Exception as e:
210
+ return f"❌ Translation error: {str(e)}", ""
 
 
 
 
 
 
 
 
 
211
 
212
  def generate_letter_audio(arabic_letter):
213
  """Generate TTS audio for a single Arabic letter or character."""
 
279
 
280
  try:
281
  tts = gTTS(text=arabic_text, lang='ar', slow=False)
 
282
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
283
  temp_path = tmp_file.name
284
  tts.save(temp_path)
 
293
  return None
294
 
295
  try:
 
296
  tts = gTTS(text=ipa_text, lang='en', slow=True)
297
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
298
  temp_path = tmp_file.name
 
317
  ipa_audio = ipa_tts(ipa_text)
318
  return arabic_text, ipa_text, arabic_audio, ipa_audio
319
 
320
+ def full_process_with_translation(latin_text):
321
+ """Complete processing: transliterate, generate audio, and translate with Google."""
322
  arabic_text = transliterate(latin_text)
323
  ipa_text = latin_to_ipa_conversion(latin_text)
324
  arabic_audio = arabic_tts(arabic_text)
325
  ipa_audio = ipa_tts(ipa_text)
326
+ translation_result = translate_multiple_words(arabic_text)
327
+ return arabic_text, ipa_text, arabic_audio, ipa_audio, translation_result
328
 
329
  # Create Gradio interface
330
+ with gr.Blocks(title="Arabic Transliterator with Google Translate") as demo:
331
+ gr.Markdown("## πŸ“ Latin-to-Arabic Transliterator with Google Translate")
332
+ gr.Markdown("Enter Latin characters to convert to Arabic script, generate audio, and get translations using Google Translate.")
333
 
334
  # Full-width transliteration guide with audio buttons
335
  with gr.Accordion("πŸ“– Interactive Transliteration Guide with Audio", open=False):
 
468
  placeholder="Shows how the Arabic sounds should be pronounced using IPA symbols"
469
  )
470
 
471
+ # Translation section (replaces dictionary)
472
  with gr.Row():
473
+ translation_output = gr.Textbox(
474
+ label="🌐 Google Translate Results",
475
  lines=8,
476
+ placeholder="Translations and meanings will appear here...",
477
  show_copy_button=True
478
  )
479
 
 
482
  convert_btn = gr.Button("πŸ”„ Transliterate", variant="primary")
483
  tts_btn = gr.Button("πŸ”Š Generate Arabic Sound", variant="secondary")
484
  ipa_btn = gr.Button("πŸ”€ Generate IPA Text", variant="secondary")
485
+ translate_btn = gr.Button("🌐 Google Translate", variant="secondary")
486
 
487
  with gr.Row():
488
  combined_btn = gr.Button("πŸ”„πŸ”Š Transliterate & Speak", variant="secondary")
489
  full_process_btn = gr.Button("πŸ”„πŸ”€πŸ”Š Full Process + IPA Audio", variant="secondary")
490
+ complete_btn = gr.Button("πŸ”„πŸŒπŸ”Š Complete Process + Translation", variant="primary")
491
 
492
  # Audio outputs
493
  with gr.Row():
 
496
  with gr.Column():
497
  ipa_audio = gr.Audio(label="IPA Pronunciation Audio (English reading)", type="filepath")
498
 
499
+ # Manual translation section
500
+ with gr.Accordion("πŸ” Manual Translation", open=False):
501
  with gr.Row():
502
+ with gr.Column():
503
+ manual_arabic_input = gr.Textbox(
504
+ label="Enter Arabic text to translate",
505
+ placeholder="Enter Arabic text for translation",
506
+ rtl=True
507
+ )
508
+ manual_translate_btn = gr.Button("🌐 Translate to English", variant="secondary")
509
+
510
+ with gr.Column():
511
+ manual_english_input = gr.Textbox(
512
+ label="Enter English text to translate",
513
+ placeholder="Enter English text for Arabic translation"
514
+ )
515
+ reverse_translate_btn = gr.Button("🌐 Translate to Arabic", variant="secondary")
516
 
517
+ manual_translation_output = gr.Textbox(
518
+ label="Manual Translation Results",
519
  lines=6,
520
  show_copy_button=True
521
  )
522
+
523
+ manual_arabic_result = gr.Textbox(
524
+ label="Translated Arabic Text",
525
+ rtl=True,
526
+ visible=False
527
+ )
528
 
529
+ # External IPA resources
530
  with gr.Accordion("🎯 Free IPA Pronunciation Tools (More Accurate)", open=False):
531
  gr.Markdown("""
532
  **🌐 Online (No Download Required):**
 
556
  outputs=arabic_audio
557
  )
558
 
559
+ translate_btn.click(
560
+ fn=translate_multiple_words,
561
  inputs=arabic_output,
562
+ outputs=translation_output
563
  )
564
 
565
  combined_btn.click(
 
575
  )
576
 
577
  complete_btn.click(
578
+ fn=full_process_with_translation,
579
  inputs=latin_input,
580
+ outputs=[arabic_output, ipa_output, arabic_audio, ipa_audio, translation_output]
581
+ )
582
+
583
+ # Manual translation handlers
584
+ manual_translate_btn.click(
585
+ fn=translate_multiple_words,
586
+ inputs=manual_arabic_input,
587
+ outputs=manual_translation_output
588
  )
589
 
590
+ reverse_translate_btn.click(
591
+ fn=reverse_translate_english,
592
+ inputs=manual_english_input,
593
+ outputs=[manual_translation_output, manual_arabic_result]
 
594
  )
595
 
596
  if __name__ == "__main__":