Spaces:
Sleeping
Sleeping
keywords
Browse files
app.py
CHANGED
|
@@ -329,13 +329,10 @@ def get_ai_response(text: str) -> str:
|
|
| 329 |
HAUSA_WORDS = [
|
| 330 |
"aikin", "manoma", "gona", "amfanin", "yanayi", "tsaba", "fasaha", "bisa", "noman", "shuka",
|
| 331 |
"daji", "rani", "damina", "amfani", "bidi'a", "noma", "bashi", "manure", "tsiro", "gishiri",
|
| 332 |
-
"
|
| 333 |
-
"
|
| 334 |
-
"
|
| 335 |
-
"
|
| 336 |
-
"kowa", "kome", "koyaushe", "koyaushe", "koyaushe", "koyaushe", "koyaushe", "koyaushe",
|
| 337 |
-
"saboda", "domin", "don", "saboda", "domin", "don", "saboda", "domin", "don",
|
| 338 |
-
"kuma", "har", "kadan", "sosai", "daidai", "kwata", "kwata", "kwata"
|
| 339 |
]
|
| 340 |
|
| 341 |
YORUBA_WORDS = [
|
|
@@ -353,14 +350,18 @@ YORUBA_WORDS = [
|
|
| 353 |
|
| 354 |
IGBO_WORDS = [
|
| 355 |
"ugbo", "akụkọ", "mmiri", "ala", "ọrụ", "ncheta", "ọhụrụ", "ugwu", "nri", "ahụhụ",
|
| 356 |
-
"
|
| 357 |
-
"
|
| 358 |
-
"
|
| 359 |
-
"
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
]
|
| 365 |
|
| 366 |
def detect_language_keywords(text: str) -> str:
|
|
@@ -372,16 +373,17 @@ def detect_language_keywords(text: str) -> str:
|
|
| 372 |
hausa_count = sum(1 for word in HAUSA_WORDS if word in text_lower)
|
| 373 |
yoruba_count = sum(1 for word in YORUBA_WORDS if word in text_lower)
|
| 374 |
igbo_count = sum(1 for word in IGBO_WORDS if word in text_lower)
|
|
|
|
| 375 |
|
| 376 |
-
logger.info(f"Language detection scores - Hausa: {hausa_count}, Yoruba: {yoruba_count}, Igbo: {igbo_count}")
|
| 377 |
|
| 378 |
-
if hausa_count > yoruba_count and hausa_count > igbo_count:
|
| 379 |
logger.info("Keyword detection: Hausa")
|
| 380 |
return "ha"
|
| 381 |
-
elif yoruba_count > hausa_count and yoruba_count > igbo_count:
|
| 382 |
logger.info("Keyword detection: Yoruba")
|
| 383 |
return "yo"
|
| 384 |
-
elif igbo_count > hausa_count and igbo_count > yoruba_count:
|
| 385 |
logger.info("Keyword detection: Igbo")
|
| 386 |
return "ig"
|
| 387 |
else:
|
|
|
|
| 329 |
HAUSA_WORDS = [
|
| 330 |
"aikin", "manoma", "gona", "amfanin", "yanayi", "tsaba", "fasaha", "bisa", "noman", "shuka",
|
| 331 |
"daji", "rani", "damina", "amfani", "bidi'a", "noma", "bashi", "manure", "tsiro", "gishiri",
|
| 332 |
+
"gonaki", "gonar", "tsirrai", "kayan", "gonar", "tsirrai", "kayan", "gonar",
|
| 333 |
+
"tsirrai", "kayan", "gonar", "tsirrai", "kayan", "gonar", "tsirrai", "kayan",
|
| 334 |
+
"gonar", "tsirrai", "kayan", "gonar", "tsirrai", "kayan", "gonar", "tsirrai",
|
| 335 |
+
"kayan", "gonar", "tsirrai", "kayan", "gonar", "tsirrai", "kayan", "gonar"
|
|
|
|
|
|
|
|
|
|
| 336 |
]
|
| 337 |
|
| 338 |
YORUBA_WORDS = [
|
|
|
|
| 350 |
|
| 351 |
IGBO_WORDS = [
|
| 352 |
"ugbo", "akụkọ", "mmiri", "ala", "ọrụ", "ncheta", "ọhụrụ", "ugwu", "nri", "ahụhụ",
|
| 353 |
+
"ọkụkọ", "ewu", "atụrụ", "ehi", "azụ", "osisi", "mkpụrụ", "ubi", "ọka", "ji",
|
| 354 |
+
"akwụkwọ", "ofe", "azu", "anụ", "nnu", "mmanụ", "ngwọ", "ọgwụ", "ahịhịa", "osisi",
|
| 355 |
+
"n'", "maka", "n'ihi", "n'ime", "n'elu", "n'okpuru", "ya", "anyị", "unu", "ha",
|
| 356 |
+
"otu", "ọtụtụ", "ebe", "oge", "ụ", "ọ", "ị", "bụ", "nọ", "ga", "dị", "ka", "ma"
|
| 357 |
+
]
|
| 358 |
+
|
| 359 |
+
ENGLISH_WORDS = [
|
| 360 |
+
"farm", "farming", "agriculture", "crop", "crops", "plant", "plants", "seed", "seeds", "soil",
|
| 361 |
+
"water", "rain", "weather", "harvest", "yield", "field", "fields", "farmer", "farmers", "grow",
|
| 362 |
+
"growing", "fertilizer", "pesticide", "irrigation", "livestock", "cattle", "chicken", "goat", "sheep",
|
| 363 |
+
"maize", "corn", "rice", "wheat", "vegetable", "vegetables", "fruit", "fruits", "tree", "trees",
|
| 364 |
+
"cultivate", "cultivation", "plow", "plowing", "sow", "sowing", "reap", "reaping", "season", "seasons"
|
| 365 |
]
|
| 366 |
|
| 367 |
def detect_language_keywords(text: str) -> str:
|
|
|
|
| 373 |
hausa_count = sum(1 for word in HAUSA_WORDS if word in text_lower)
|
| 374 |
yoruba_count = sum(1 for word in YORUBA_WORDS if word in text_lower)
|
| 375 |
igbo_count = sum(1 for word in IGBO_WORDS if word in text_lower)
|
| 376 |
+
english_count = sum(1 for word in ENGLISH_WORDS if word in text_lower)
|
| 377 |
|
| 378 |
+
logger.info(f"Language detection scores - Hausa: {hausa_count}, Yoruba: {yoruba_count}, Igbo: {igbo_count}, English: {english_count}")
|
| 379 |
|
| 380 |
+
if hausa_count > yoruba_count and hausa_count > igbo_count and hausa_count > english_count:
|
| 381 |
logger.info("Keyword detection: Hausa")
|
| 382 |
return "ha"
|
| 383 |
+
elif yoruba_count > hausa_count and yoruba_count > igbo_count and yoruba_count > english_count:
|
| 384 |
logger.info("Keyword detection: Yoruba")
|
| 385 |
return "yo"
|
| 386 |
+
elif igbo_count > hausa_count and igbo_count > yoruba_count and igbo_count > english_count:
|
| 387 |
logger.info("Keyword detection: Igbo")
|
| 388 |
return "ig"
|
| 389 |
else:
|