|
|
import re |
|
|
|
|
|
def clean_arabic_translation(text: str) -> str: |
|
|
""" |
|
|
تنظيف وتحسين الترجمة العربية |
|
|
""" |
|
|
if not text or not isinstance(text, str): |
|
|
return text |
|
|
|
|
|
|
|
|
text = re.sub(r'[^\w\s\u0600-\u06FF،؛؟.!]', '', text) |
|
|
|
|
|
|
|
|
correction_rules = { |
|
|
" هو ": " هي ", |
|
|
" قال ": " قالت ", |
|
|
" هذا ": " هذه ", |
|
|
" لكن ": " لكنّ ", |
|
|
"إلي": "إلى", |
|
|
"اللّه": "الله", |
|
|
"ان ": "أن ", |
|
|
"هي ": "هي ", |
|
|
"ة": "ه", |
|
|
} |
|
|
|
|
|
for wrong, right in correction_rules.items(): |
|
|
text = text.replace(wrong, right) |
|
|
|
|
|
|
|
|
punctuation_map = { |
|
|
"?": "؟", |
|
|
",": "،", |
|
|
";": "؛", |
|
|
":": ":", |
|
|
} |
|
|
|
|
|
for eng_punct, ar_punct in punctuation_map.items(): |
|
|
text = text.replace(eng_punct, ar_punct) |
|
|
|
|
|
|
|
|
text = re.sub(r'\s+', ' ', text).strip() |
|
|
|
|
|
|
|
|
if text and text[-1] not in [".", "؟", "!", "،"]: |
|
|
text += "." |
|
|
|
|
|
return text |