nexusbert commited on
Commit
102ad84
·
1 Parent(s): a955afe
Files changed (1) hide show
  1. app.py +108 -16
app.py CHANGED
@@ -189,26 +189,85 @@ def preprocess_audio_ffmpeg(audio_data: bytes, target_sr: int = 16000) -> np.nda
189
  logger.error(f"FFmpeg preprocessing failed: {e}")
190
  raise HTTPException(status_code=400, detail="Audio preprocessing failed. Ensure ffmpeg is installed.")
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  def speech_to_text(audio_data: bytes) -> str:
193
  audio_array = preprocess_audio_ffmpeg(audio_data)
194
 
195
- igbo_result = _get_igbo_asr()
196
- if igbo_result[0] is not None and igbo_result[1] is not None:
197
- igbo_model, igbo_proc = igbo_result
198
- igbo_text = _run_whisper(igbo_model, igbo_proc, audio_array, language="igbo")
199
- if igbo_text and igbo_text.strip():
200
- logger.info("Using Igbo ASR result")
201
- return igbo_text
202
 
203
  mms_result = _get_mms()
204
  if mms_result and mms_result[0] is not None and mms_result[1] is not None:
205
  mms_model, mms_proc = mms_result
206
  mms_text = _run_mms(mms_model, mms_proc, audio_array)
207
- if mms_text and mms_text.strip():
208
- logger.info("Using MMS ASR result")
209
- return mms_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
 
214
  def get_ai_response(text: str, response_language: str = None) -> str:
@@ -235,11 +294,26 @@ def get_ai_response(text: str, response_language: str = None) -> str:
235
 
236
  HAUSA_WORDS = [
237
  "aikin","manoma","gona","amfanin","yanayi","tsaba","fasaha","bisa","noman","shuka",
238
- "daji","rani","damina","amfani","bidi'a","noma","bashi","manure","tsiro","gishiri"
 
 
 
 
 
 
 
 
 
 
239
  ]
240
 
241
- YORUBA_WORDS = [
242
- "ilé","ọmọ","òun","awọn","agbẹ","oko","ọgbà","irugbin","àkọsílẹ","omi","ojo","àgbàlá","irọlẹ"
 
 
 
 
 
243
  ]
244
 
245
  IGBO_WORDS = [
@@ -249,6 +323,22 @@ IGBO_WORDS = [
249
  "ọrụ","ugbo","mmiri","ala","nri","ahụhụ","ọhụrụ","ncheta","akụkọ","ugwu"
250
  ]
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  def detect_language(text: str) -> str:
253
  text_lower = text.lower()
254
  if any(word in text_lower for word in HAUSA_WORDS):
@@ -257,6 +347,8 @@ def detect_language(text: str) -> str:
257
  return "yo"
258
  elif any(word in text_lower for word in IGBO_WORDS):
259
  return "ig"
 
 
260
  lang = detect(text)
261
  if lang.startswith("ha"):
262
  return "ha"
@@ -273,8 +365,8 @@ def text_to_speech_file(text: str) -> str:
273
 
274
  supported_tts_languages = ["ha", "yo", "en"]
275
  if lang not in supported_tts_languages:
276
- logger.warning(f"Language '{lang}' not supported for TTS, falling back to English")
277
- lang = "en"
278
 
279
  global tts_ig
280
  if lang == "ha":
 
189
  logger.error(f"FFmpeg preprocessing failed: {e}")
190
  raise HTTPException(status_code=400, detail="Audio preprocessing failed. Ensure ffmpeg is installed.")
191
 
192
+ def _score_transcription_quality(text: str) -> float:
193
+ if not text or not text.strip():
194
+ return 0.0
195
+
196
+ text_lower = text.lower()
197
+ score = 0.0
198
+
199
+ if len(text.strip()) > 3:
200
+ score += 0.3
201
+
202
+ if any(char.isalpha() for char in text):
203
+ score += 0.2
204
+
205
+ if len(text.split()) > 1:
206
+ score += 0.2
207
+
208
+ if not any(char in text for char in "[]{}()"):
209
+ score += 0.1
210
+
211
+ if not text.endswith("..."):
212
+ score += 0.1
213
+
214
+ if len(text.strip()) > 10:
215
+ score += 0.1
216
+
217
+ return min(score, 1.0)
218
+
219
  def speech_to_text(audio_data: bytes) -> str:
220
  audio_array = preprocess_audio_ffmpeg(audio_data)
221
 
222
+ mms_text = ""
223
+ igbo_text = ""
 
 
 
 
 
224
 
225
  mms_result = _get_mms()
226
  if mms_result and mms_result[0] is not None and mms_result[1] is not None:
227
  mms_model, mms_proc = mms_result
228
  mms_text = _run_mms(mms_model, mms_proc, audio_array)
229
+ logger.info(f"MMS result: '{mms_text}'")
230
+
231
+ igbo_result = _get_igbo_asr()
232
+ if igbo_result[0] is not None and igbo_result[1] is not None:
233
+ igbo_model, igbo_proc = igbo_result
234
+ igbo_text = _run_whisper(igbo_model, igbo_proc, audio_array, language="igbo")
235
+ logger.info(f"Igbo ASR result: '{igbo_text}'")
236
+
237
+ if not mms_text and not igbo_text:
238
+ return ""
239
+
240
+ if not mms_text:
241
+ logger.info("Using Igbo ASR result (MMS failed)")
242
+ return igbo_text
243
+
244
+ if not igbo_text:
245
+ logger.info("Using MMS ASR result (Igbo ASR failed)")
246
+ return mms_text
247
+
248
+ mms_score = _score_transcription_quality(mms_text)
249
+ igbo_score = _score_transcription_quality(igbo_text)
250
 
251
+ mms_lang = detect_language(mms_text)
252
+ igbo_lang = detect_language(igbo_text)
253
+
254
+ logger.info(f"MMS: '{mms_text}' (score: {mms_score:.2f}, lang: {mms_lang})")
255
+ logger.info(f"Igbo: '{igbo_text}' (score: {igbo_score:.2f}, lang: {igbo_lang})")
256
+
257
+ if igbo_lang == "ig" and mms_lang != "ig":
258
+ logger.info("Using Igbo ASR result (detected Igbo language)")
259
+ return igbo_text
260
+
261
+ if mms_lang == "ig" and igbo_lang != "ig":
262
+ logger.info("Using MMS ASR result (Igbo ASR didn't detect Igbo)")
263
+ return mms_text
264
+
265
+ if igbo_score > mms_score + 0.1:
266
+ logger.info("Using Igbo ASR result (higher quality score)")
267
+ return igbo_text
268
+ else:
269
+ logger.info("Using MMS ASR result (higher quality score)")
270
+ return mms_text
271
 
272
 
273
  def get_ai_response(text: str, response_language: str = None) -> str:
 
294
 
295
  HAUSA_WORDS = [
296
  "aikin","manoma","gona","amfanin","yanayi","tsaba","fasaha","bisa","noman","shuka",
297
+ "daji","rani","damina","amfani","bidi'a","noma","bashi","manure","tsiro","gishiri",
298
+ "kasa","ruwa","iska","rana","wata","dare","gari","shinkafa","wake","gyada",
299
+ "aljihu","kudi","kasuwa","gida","makaranta","asibiti","motar","jirgi","keke","doki",
300
+ "kare","kaza","rago","tunkiya","shanu","kaji","kaza","kifi","kifi","kifi",
301
+ "abinci","ruwa","shayi","kofi","sugar","gishiri","mai","kayan","miya","tuwo",
302
+ "fura","koko","kunu","zobo","fura","koko","kunu","zobo","fura","koko",
303
+ "yaro","yarinya","mutum","mace","yara","mata","maza","dattijo","tsoho","sabon",
304
+ "babba","karami","tsufa","sabon","kyau","mugun","dadi","daci","mai","kyau",
305
+ "ina","wane","me","yaushe","yaya","dom","saboda","tare","da","kuma","amma",
306
+ "ko","idan","sai","har","tun","sai","kafin","bayan","cikin","kan","karkashin",
307
+ "gaban","bayan","tsakanin","tsaye","kwance","hau","sauka","tashi","zauna","tashi"
308
  ]
309
 
310
+ YORUBA_WORDS = ["ọkọ","aya","baba","iya","egbon","aburo","omo","ebi","ọrẹ","arakunrin",
311
+ "ọjọ","ọsẹ","osu","ọdun","owuro","ọsan","alẹ","oru","ile","ita",
312
+ "oja","aso","ounje","ata","isu","iresi","ewa","epo","iyọ","ọti",
313
+ "nkan","ti","ni","si","lati","ati","pelu","fun","ninu","lori",
314
+ "abẹ","iwaju","ẹhin","arin","wọle","jade","lo","wa","je","mu",
315
+ "sọ","ro","kọ","gbo","ri","mọ","fẹ","nifẹ","fẹran","dara",
316
+ "buburu","tobi","kekere","pupa","funfun","dudu","pọ","diẹ","gbogbo","kan"
317
  ]
318
 
319
  IGBO_WORDS = [
 
323
  "ọrụ","ugbo","mmiri","ala","nri","ahụhụ","ọhụrụ","ncheta","akụkọ","ugwu"
324
  ]
325
 
326
+ ENGLISH_WORDS = [
327
+ "farm","farmer","farming","agriculture","crop","crops","plant","plants","seed","seeds",
328
+ "soil","water","rain","sun","weather","harvest","field","fields","tractor","plow",
329
+ "fertilizer","pesticide","organic","yield","yields","grain","wheat","corn","rice",
330
+ "vegetable","vegetables","fruit","fruits","livestock","cattle","cow","cows","sheep",
331
+ "goat","goats","chicken","chickens","pig","pigs","milk","meat","eggs","feed",
332
+ "barn","silo","greenhouse","irrigation","drought","flood","pest","disease","healthy",
333
+ "growth","mature","ripe","planting","sowing","weeding","pruning","spraying","fertilizing",
334
+ "cultivation","cultivate","cultivated","arable","fertile","barren","productive","yield",
335
+ "production","produce","grow","growing","grown","planted","harvested","harvesting",
336
+ "season","seasons","spring","summer","autumn","winter","climate","temperature","humidity",
337
+ "moisture","dry","wet","fertile","nutrients","nutrition","feed","feeding","grazing",
338
+ "pasture","meadow","grass","hay","straw","compost","manure","organic","chemical",
339
+ "sustainable","sustainability","biodiversity","ecosystem","environment","conservation"
340
+ ]
341
+
342
  def detect_language(text: str) -> str:
343
  text_lower = text.lower()
344
  if any(word in text_lower for word in HAUSA_WORDS):
 
347
  return "yo"
348
  elif any(word in text_lower for word in IGBO_WORDS):
349
  return "ig"
350
+ elif any(word in text_lower for word in ENGLISH_WORDS):
351
+ return "en"
352
  lang = detect(text)
353
  if lang.startswith("ha"):
354
  return "ha"
 
365
 
366
  supported_tts_languages = ["ha", "yo", "en"]
367
  if lang not in supported_tts_languages:
368
+ logger.warning(f"Language '{lang}' not supported for TTS, returning text response")
369
+ raise Exception(f"TTS not available for language '{lang}' - returning text response")
370
 
371
  global tts_ig
372
  if lang == "ha":