Mohansai2004 commited on
Commit
b984bd8
Β·
verified Β·
1 Parent(s): 9ca7cf8

Update scripts/download_models.py

Browse files
Files changed (1) hide show
  1. scripts/download_models.py +23 -115
scripts/download_models.py CHANGED
@@ -356,107 +356,10 @@ async def install_argos_packages():
356
 
357
 
358
 
359
- async def download_piper_models():
360
- """Download Piper TTS voice models for Indian languages."""
361
- print("\n" + "="*60)
362
- print("Piper TTS Voice Models")
363
- print("="*60)
364
-
365
- # Always use /tmp path to avoid permission issues in Docker
366
- piper_dir = Path("/tmp") / ".local" / "share" / "piper" / "voices"
367
- piper_dir.mkdir(parents=True, exist_ok=True)
368
- print(f"Using Piper models path: {piper_dir}")
369
-
370
- # Piper voice models to download
371
- # Using direct GitHub release URLs for reliability
372
- piper_models = {
373
- "hi_IN": {
374
- "display_name": "Hindi",
375
- "model": "hi_IN-swarajya-medium",
376
- "url": "https://github.com/rhasspy/piper/releases/download/2023.11.14-2/voice-hi-in-swarajya-medium.tar.gz",
377
- "config_url": None # Config included in tar.gz
378
- },
379
- "te_IN": {
380
- "display_name": "Telugu",
381
- "model": "te_IN-multi-medium",
382
- "url": "https://github.com/rhasspy/piper/releases/download/2023.11.14-2/voice-te-in-multi-medium.tar.gz",
383
- "config_url": None # Config included in tar.gz
384
- },
385
- "ta_IN": {
386
- "display_name": "Tamil",
387
- "model": "ta_IN-multi-medium",
388
- "url": "https://github.com/rhasspy/piper/releases/download/2023.11.14-2/voice-ta-in-multi-medium.tar.gz",
389
- "config_url": None # Config included in tar.gz
390
- },
391
- }
392
-
393
- # Prepare download list
394
- downloads = []
395
- skipped = []
396
-
397
- for lang_code, model_info in piper_models.items():
398
- model_dir = piper_dir / model_info['model']
399
- model_dir.mkdir(parents=True, exist_ok=True)
400
-
401
- model_file = model_dir / f"{model_info['model']}.onnx"
402
- config_file = model_dir / f"{model_info['model']}.onnx.json"
403
-
404
- if model_file.exists() and config_file.exists():
405
- skipped.append(model_info['display_name'])
406
- continue
407
-
408
- # Add tar.gz download
409
- if not model_file.exists():
410
- tar_path = piper_dir / f"{model_info['model']}.tar.gz"
411
- downloads.append((
412
- model_info['url'],
413
- tar_path,
414
- f"Piper {model_info['display_name']}"
415
- ))
416
-
417
- if skipped:
418
- print(f"\nSkipping existing models: {', '.join(skipped)}")
419
-
420
- if not downloads:
421
- print("\nβœ“ All Piper models already downloaded!")
422
- return
423
-
424
- print(f"\nDownloading {len(downloads)} files...")
425
-
426
- # Download all models
427
- async with ParallelDownloader(max_concurrent=3) as downloader:
428
- results = await downloader.download_multiple(downloads)
429
-
430
- # Extract tar.gz files
431
- print("\n\nExtracting models...")
432
- import tarfile
433
-
434
- for lang_code, model_info in piper_models.items():
435
- tar_path = piper_dir / f"{model_info['model']}.tar.gz"
436
- model_dir = piper_dir / model_info['model']
437
-
438
- if tar_path.exists():
439
- try:
440
- print(f" Extracting {model_info['display_name']}...")
441
- with tarfile.open(tar_path, 'r:gz') as tar:
442
- tar.extractall(model_dir)
443
-
444
- # Clean up tar file
445
- tar_path.unlink()
446
- print(f" βœ“ {model_info['display_name']} extracted")
447
- except Exception as e:
448
- print(f" βœ— {model_info['display_name']}: {str(e)[:50]}")
449
-
450
- print("\nβœ“ Piper model download complete!")
451
- print("\nπŸ“‹ Downloaded Piper voices for:")
452
- for lang_code, model_info in piper_models.items():
453
- print(f" β€’ {model_info['display_name']} ({lang_code})")
454
-
455
-
456
  def setup_tts_models():
457
- """Setup TTS models (Coqui TTS for English)."""
458
  print("\n" + "="*60)
459
- print("Coqui TTS Model Setup")
460
  print("="*60)
461
 
462
  # Create TTS directory
@@ -473,17 +376,23 @@ def setup_tts_models():
473
  print(" Or: pip install -r requirements.txt\n")
474
  return
475
 
476
- print("\nπŸ“‹ Coqui TTS Languages:")
477
- print(" β€’ English (en) - LJSpeech Tacotron2-DDC")
478
- print(" β€’ European languages (es, fr, de, it, pt, etc.)")
479
-
480
- print("\nπŸ“‹ Piper TTS Languages (12 Indian languages):")
481
- print(" β€’ Hindi, Bengali, Tamil, Telugu")
482
- print(" β€’ Kannada, Malayalam, Marathi, Gujarati")
483
- print(" β€’ Punjabi, Urdu, Assamese, Odia")
484
-
485
- print("\nβœ“ Coqui TTS models auto-download on first use")
486
- print("βœ“ Piper TTS models downloaded via download_piper_models()")
 
 
 
 
 
 
487
  print()
488
 
489
 
@@ -508,7 +417,6 @@ async def main():
508
  try:
509
  await download_vosk_models()
510
  await install_argos_packages()
511
- await download_piper_models()
512
  setup_tts_models()
513
 
514
  elapsed = time.time() - start_time
@@ -527,10 +435,10 @@ async def main():
527
  print(" βœ“ English ↔ Hindi")
528
  print(" βœ“ English ↔ Bengali")
529
  print("\n TTS (Text-to-Speech): 13 languages")
530
- print(" βœ“ English (Coqui LJSpeech Tacotron2)")
531
- print(" βœ“ Hindi, Bengali, Telugu, Tamil (Piper TTS)")
532
- print(" βœ“ Marathi, Gujarati, Kannada, Malayalam (Piper TTS)")
533
- print(" βœ“ Punjabi, Urdu, Assamese, Odia (Piper TTS)")
534
 
535
  print("\n" + "="*70)
536
  print("\nπŸš€ Ready to Start!")
 
356
 
357
 
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  def setup_tts_models():
360
+ """Setup TTS models (Coqui TTS downloads on first use)."""
361
  print("\n" + "="*60)
362
+ print("TTS Model Setup")
363
  print("="*60)
364
 
365
  # Create TTS directory
 
376
  print(" Or: pip install -r requirements.txt\n")
377
  return
378
 
379
+ print("\nπŸ“‹ Supported TTS Languages:")
380
+ print(" β€’ English (en) - XTTS v2")
381
+ print(" β€’ Hindi (hi), Bengali (bn), Telugu (te), Tamil (ta)")
382
+ print(" β€’ Marathi (mr), Gujarati (gu), Kannada (kn), Malayalam (ml)")
383
+ print(" β€’ Punjabi (pa), Urdu (ur), Assamese (as), Odia (or)")
384
+ print(" β€’ Plus 15 more European/Asian languages via XTTS v2")
385
+
386
+ print("\nβœ“ TTS models auto-download on first use (no pre-download needed)")
387
+
388
+ # Optional: Pre-download commonly used models
389
+ print("\nπŸ’‘ To pre-download models, use Python:")
390
+ print(" from TTS.api import TTS")
391
+ print(" # For English/European languages:")
392
+ print(" TTS('tts_models/multilingual/multi-dataset/xtts_v2')")
393
+ print(" # For Indian languages (example):")
394
+ print(" TTS('tts_models/hin/fairseq/vits') # Hindi")
395
+ print(" TTS('tts_models/ben/fairseq/vits') # Bengali")
396
  print()
397
 
398
 
 
417
  try:
418
  await download_vosk_models()
419
  await install_argos_packages()
 
420
  setup_tts_models()
421
 
422
  elapsed = time.time() - start_time
 
435
  print(" βœ“ English ↔ Hindi")
436
  print(" βœ“ English ↔ Bengali")
437
  print("\n TTS (Text-to-Speech): 13 languages")
438
+ print(" βœ“ English")
439
+ print(" βœ“ Hindi, Bengali, Telugu, Tamil")
440
+ print(" βœ“ Marathi, Gujarati, Kannada, Malayalam")
441
+ print(" βœ“ Punjabi, Urdu, Assamese, Odia")
442
 
443
  print("\n" + "="*70)
444
  print("\nπŸš€ Ready to Start!")