Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,6 +38,40 @@ shortcut_map = {
|
|
| 38 |
"sxp": "saaxiib"
|
| 39 |
}
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def number_to_words(number):
|
| 42 |
number = int(number)
|
| 43 |
if number < 20:
|
|
@@ -86,13 +120,19 @@ def normalize_text(text):
|
|
| 86 |
text = re.sub(r'(?i)(?<!\w)zamzam(?!\w)', 'samsam', text)
|
| 87 |
|
| 88 |
# ➤ Bedel shortcuts - eray kasta oo qoraalka ku jira beddel
|
| 89 |
-
# Ka dhig case-insensitive beddelka
|
| 90 |
def replace_shortcuts(match):
|
| 91 |
word = match.group(0).lower()
|
| 92 |
return shortcut_map.get(word, word)
|
| 93 |
pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in shortcut_map.keys()) + r')\b', re.IGNORECASE)
|
| 94 |
text = pattern.sub(replace_shortcuts, text)
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
# ➤ Ka saar tirooyin leh koma iyo tobanle
|
| 97 |
text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
|
| 98 |
text = re.sub(r'\.\d+', '', text)
|
|
|
|
| 38 |
"sxp": "saaxiib"
|
| 39 |
}
|
| 40 |
|
| 41 |
+
# Countries dictionary (English to Somali)
|
| 42 |
+
country_map = {
|
| 43 |
+
"somalia": "Soomaaliya",
|
| 44 |
+
"ethiopia": "Itoobiya",
|
| 45 |
+
"kenya": "Kenya",
|
| 46 |
+
"djibouti": "Jabuuti",
|
| 47 |
+
"sudan": "Suudaan",
|
| 48 |
+
"south sudan": "Koonfurta Suudaan",
|
| 49 |
+
"uganda": "Ugaandha",
|
| 50 |
+
"tanzania": "Tansaaniya",
|
| 51 |
+
"egypt": "Masar",
|
| 52 |
+
"libya": "Liibiya",
|
| 53 |
+
"algeria": "Aljeeriya",
|
| 54 |
+
"morocco": "Morooko",
|
| 55 |
+
"tunisia": "Tuniisiya",
|
| 56 |
+
"eritrea": "Eriteriya",
|
| 57 |
+
"malawi": "Malaawi",
|
| 58 |
+
"mozambique": "Mosambiik",
|
| 59 |
+
"zambia": "Sambiya",
|
| 60 |
+
"zimbabwe": "Simbabwe",
|
| 61 |
+
"niger": "Niyjer",
|
| 62 |
+
"nigeria": "Nayjeeriya",
|
| 63 |
+
"united states": "Maraykanka",
|
| 64 |
+
"china": "Shiinaha",
|
| 65 |
+
"india": "Hindiya",
|
| 66 |
+
"russia": "Ruushka",
|
| 67 |
+
"united kingdom": "Boqortooyada Midowday",
|
| 68 |
+
"germany": "Jarmalka",
|
| 69 |
+
"france": "Faransiiska",
|
| 70 |
+
"japan": "Jabaan",
|
| 71 |
+
"canada": "Kanada",
|
| 72 |
+
"australia": "Australia"
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
def number_to_words(number):
|
| 76 |
number = int(number)
|
| 77 |
if number < 20:
|
|
|
|
| 120 |
text = re.sub(r'(?i)(?<!\w)zamzam(?!\w)', 'samsam', text)
|
| 121 |
|
| 122 |
# ➤ Bedel shortcuts - eray kasta oo qoraalka ku jira beddel
|
|
|
|
| 123 |
def replace_shortcuts(match):
|
| 124 |
word = match.group(0).lower()
|
| 125 |
return shortcut_map.get(word, word)
|
| 126 |
pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in shortcut_map.keys()) + r')\b', re.IGNORECASE)
|
| 127 |
text = pattern.sub(replace_shortcuts, text)
|
| 128 |
|
| 129 |
+
# ➤ Bedel magacyada waddamada
|
| 130 |
+
def replace_countries(match):
|
| 131 |
+
word = match.group(0).lower()
|
| 132 |
+
return country_map.get(word, word)
|
| 133 |
+
country_pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in country_map.keys()) + r')\b', re.IGNORECASE)
|
| 134 |
+
text = country_pattern.sub(replace_countries, text)
|
| 135 |
+
|
| 136 |
# ➤ Ka saar tirooyin leh koma iyo tobanle
|
| 137 |
text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
|
| 138 |
text = re.sub(r'\.\d+', '', text)
|