Mohansai2004 commited on
Commit
3931db8
·
verified ·
1 Parent(s): c580324

Update app/utils/model_scanner.py

Browse files
Files changed (1) hide show
  1. app/utils/model_scanner.py +66 -111
app/utils/model_scanner.py CHANGED
@@ -223,96 +223,10 @@ class ModelScanner:
223
  Returns:
224
  Dictionary of available TTS languages with voices
225
  """
226
- # ALWAYS return our 13 explicitly supported languages
227
- # English uses Coqui LJSpeech model, Indian languages use Piper TTS
228
- tts_models = {
229
- # English - Uses Coqui LJSpeech Tacotron2 model
230
- "en": {
231
- "name": "English",
232
- "model": "tts_models/en/ljspeech/tacotron2-DDC",
233
- "engine": "coqui",
234
- "voices": ["LJSpeech Tacotron2-DDC"]
235
- },
236
- # Indian Languages - All use Piper TTS for better quality
237
- "hi": {
238
- "name": "Hindi",
239
- "model": "piper/hi_IN-swarajya-medium",
240
- "engine": "piper",
241
- "voices": ["Swarajya Medium"]
242
- },
243
- "bn": {
244
- "name": "Bengali",
245
- "model": "piper/bn_BD-multi-medium",
246
- "engine": "piper",
247
- "voices": ["Multi Medium"]
248
- },
249
- "te": {
250
- "name": "Telugu",
251
- "model": "piper/te_IN-multi-medium",
252
- "engine": "piper",
253
- "voices": ["Multi Medium"]
254
- },
255
- "ta": {
256
- "name": "Tamil",
257
- "model": "piper/ta_IN-multi-medium",
258
- "engine": "piper",
259
- "voices": ["Multi Medium"]
260
- },
261
- "mr": {
262
- "name": "Marathi",
263
- "model": "piper/mr_IN-multi-medium",
264
- "engine": "piper",
265
- "voices": ["Multi Medium"]
266
- },
267
- "gu": {
268
- "name": "Gujarati",
269
- "model": "piper/gu_IN-multi-medium",
270
- "engine": "piper",
271
- "voices": ["Multi Medium"]
272
- },
273
- "kn": {
274
- "name": "Kannada",
275
- "model": "piper/kn_IN-multi-medium",
276
- "engine": "piper",
277
- "voices": ["Multi Medium"]
278
- },
279
- "ml": {
280
- "name": "Malayalam",
281
- "model": "piper/ml_IN-multi-medium",
282
- "engine": "piper",
283
- "voices": ["Multi Medium"]
284
- },
285
- "pa": {
286
- "name": "Punjabi",
287
- "model": "piper/pa_IN-multi-medium",
288
- "engine": "piper",
289
- "voices": ["Multi Medium"]
290
- },
291
- "ur": {
292
- "name": "Urdu",
293
- "model": "piper/ur_PK-multi-medium",
294
- "engine": "piper",
295
- "voices": ["Multi Medium"]
296
- },
297
- "as": {
298
- "name": "Assamese",
299
- "model": "piper/as_IN-multi-medium",
300
- "engine": "piper",
301
- "voices": ["Multi-speaker Medium (Limited)"]
302
- },
303
- "or": {
304
- "name": "Odia",
305
- "model": "piper/or-multi_multispeaker-medium",
306
- "engine": "piper",
307
- "voices": ["Multi-speaker Medium (Limited)"]
308
- },
309
- }
310
 
311
- logger.info("tts_languages_available",
312
- count=len(tts_models),
313
- languages=list(tts_models.keys()))
314
 
315
- # Optionally scan for additional models (but don't override our 13 core languages)
316
  try:
317
  # Disable numba cache to avoid librosa caching errors
318
  import os
@@ -326,10 +240,11 @@ class ModelScanner:
326
  # Get list of available models using ModelManager
327
  manager = ModelManager()
328
  available_models = manager.list_models()
329
- logger.info("tts_coqui_models_scanned", count=len(available_models))
330
 
331
- # Language name mappings for additional languages
332
- additional_language_names = {
 
333
  "es": "Spanish",
334
  "fr": "French",
335
  "de": "German",
@@ -342,46 +257,86 @@ class ModelScanner:
342
  "cs": "Czech",
343
  "ar": "Arabic",
344
  "zh": "Chinese",
345
- "zh-CN": "Chinese (Simplified)",
346
  "ja": "Japanese",
347
  "ko": "Korean",
348
  "hu": "Hungarian",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  }
350
 
351
- # Extract language codes from model names (for additional languages only)
352
  lang_voices: Dict[str, List[str]] = {}
353
  for model in available_models:
 
354
  parts = model.split("/")
355
  if len(parts) >= 2 and parts[0] == "tts_models":
356
  lang_code = parts[1]
357
- # Only add if not already in our core 13 languages
358
- if lang_code not in tts_models:
359
- if lang_code not in lang_voices:
360
- lang_voices[lang_code] = []
361
- lang_voices[lang_code].append(model)
362
 
363
- # Add additional discovered languages
364
  for lang_code, voices in lang_voices.items():
365
- if lang_code not in tts_models: # Don't override core languages
366
- tts_models[lang_code] = {
367
- "name": additional_language_names.get(lang_code, lang_code.upper()),
368
- "voices": voices[:5]
369
- }
370
- logger.info("found_additional_tts_language", language=lang_code, voice_count=len(voices))
371
 
372
  except ImportError:
373
- logger.warning("coqui_tts_not_available", message="Coqui TTS not installed, using core 13 languages")
374
- # Our core 13 languages are already defined above
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  except RuntimeError as e:
376
  # Handle numba caching errors
377
  if "cannot cache function" in str(e):
378
- logger.warning("tts_numba_caching_error", error=str(e), message="Using core 13 languages")
 
 
379
  else:
380
- logger.warning("tts_scan_error", error=str(e), message="Using core 13 languages")
381
  except Exception as e:
382
- logger.warning("error_scanning_additional_tts_models", error=str(e), message="Using core 13 languages")
 
383
 
384
- logger.info("tts_scan_complete", total_languages=len(tts_models))
385
  return tts_models
386
 
387
  @staticmethod
 
223
  Returns:
224
  Dictionary of available TTS languages with voices
225
  """
226
+ tts_models = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ logger.info("scanning_tts_models")
 
 
229
 
 
230
  try:
231
  # Disable numba cache to avoid librosa caching errors
232
  import os
 
240
  # Get list of available models using ModelManager
241
  manager = ModelManager()
242
  available_models = manager.list_models()
243
+ logger.info("tts_available_models", count=len(available_models))
244
 
245
+ # Language name mappings - includes English and all major Indian languages
246
+ language_names = {
247
+ "en": "English",
248
  "es": "Spanish",
249
  "fr": "French",
250
  "de": "German",
 
257
  "cs": "Czech",
258
  "ar": "Arabic",
259
  "zh": "Chinese",
 
260
  "ja": "Japanese",
261
  "ko": "Korean",
262
  "hu": "Hungarian",
263
+ # Indian Languages
264
+ "hi": "Hindi",
265
+ "hin": "Hindi",
266
+ "bn": "Bengali",
267
+ "ben": "Bengali",
268
+ "te": "Telugu",
269
+ "tel": "Telugu",
270
+ "ta": "Tamil",
271
+ "tam": "Tamil",
272
+ "mr": "Marathi",
273
+ "mar": "Marathi",
274
+ "gu": "Gujarati",
275
+ "guj": "Gujarati",
276
+ "kn": "Kannada",
277
+ "kan": "Kannada",
278
+ "ml": "Malayalam",
279
+ "mal": "Malayalam",
280
+ "pa": "Punjabi",
281
+ "pan": "Punjabi",
282
+ "ur": "Urdu",
283
+ "urd": "Urdu",
284
+ "as": "Assamese",
285
+ "asm": "Assamese",
286
+ "or": "Odia",
287
+ "ory": "Odia",
288
  }
289
 
290
+ # Extract language codes from model names
291
  lang_voices: Dict[str, List[str]] = {}
292
  for model in available_models:
293
+ # Extract language code from model name (e.g., "tts_models/en/ljspeech/...")
294
  parts = model.split("/")
295
  if len(parts) >= 2 and parts[0] == "tts_models":
296
  lang_code = parts[1]
297
+ if lang_code not in lang_voices:
298
+ lang_voices[lang_code] = []
299
+ lang_voices[lang_code].append(model)
 
 
300
 
301
+ # Build TTS language dictionary
302
  for lang_code, voices in lang_voices.items():
303
+ tts_models[lang_code] = {
304
+ "name": language_names.get(lang_code, lang_code.upper()),
305
+ "voices": voices[:5] # Limit to first 5 voices
306
+ }
307
+ logger.info("found_tts_language", language=lang_code, voice_count=len(voices))
 
308
 
309
  except ImportError:
310
+ logger.warning("coqui_tts_not_available")
311
+ # Fallback: check for downloaded models in filesystem
312
+ tts_path = Path(settings.coqui_model_path)
313
+ logger.info("checking_tts_filesystem", path=str(tts_path), exists=tts_path.exists())
314
+
315
+ if tts_path.exists():
316
+ try:
317
+ for item in tts_path.iterdir():
318
+ if item.is_dir():
319
+ lang_code = item.name.split("_")[0] if "_" in item.name else item.name[:2]
320
+ tts_models[lang_code] = {
321
+ "name": lang_code.upper(),
322
+ "voices": [item.name]
323
+ }
324
+ logger.info("found_tts_model_filesystem", language=lang_code, model=item.name)
325
+ except Exception as e:
326
+ logger.error("error_reading_tts_directory", error=str(e))
327
  except RuntimeError as e:
328
  # Handle numba caching errors
329
  if "cannot cache function" in str(e):
330
+ logger.warning("tts_numba_caching_error_using_fallback", error=str(e))
331
+ # Return empty dict - models need to be downloaded manually
332
+ return {}
333
  else:
334
+ raise
335
  except Exception as e:
336
+ logger.error("error_scanning_tts_models", error=str(e), exc_info=True)
337
+ return {}
338
 
339
+ logger.info("tts_scan_complete", models_found=len(tts_models))
340
  return tts_models
341
 
342
  @staticmethod