rasenganai commited on
Commit
2b115c1
·
verified ·
1 Parent(s): a60b21f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +51 -4
README.md CHANGED
@@ -31,11 +31,58 @@ We are also open sourcing the ability to finetune on your own voice.
31
 
32
 
33
 
34
- <!-- ## Installation -->
35
- <!--
 
 
 
 
 
 
36
  ```bash
37
- pip install git+https://github.com/dubverse-ai/MahaTTSv2.git
38
- ``` -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  ### Model Params
 
31
 
32
 
33
 
34
+ ## Installation
35
+
36
+ ```bash
37
+ git lfs install
38
+ git clone --recurse-submodules https://huggingface.co/Dubverse/MahaTTSv2
39
+ pip install -r MahaTTSv2/requirements.txt
40
+ ```
41
+
42
  ```bash
43
+ import sys
44
+ sys.path.append("MahaTTSv2/")
45
+ import os
46
+ import torch
47
+ import subprocess
48
+ from inference import infer, prepare_inputs, load_t2s_model, load_cfm, create_wav_header
49
+
50
+ device = "cuda"# if torch.cuda.is_available() else "cpu"
51
+ print("Using device", device)
52
+
53
+ # Model checkpoints
54
+ m1_checkpoint = "MahaTTSv2/pretrained_checkpoint/m1_gemma_benchmark_1_latest_weights.pt"
55
+ m2_checkpoint = "MahaTTSv2/pretrained_checkpoint/m2.pt"
56
+ vocoder_checkpoint = 'MahaTTSv2/pretrained_checkpoint/700_580k_multilingual_infer_ready/'
57
+
58
+ global FM, vocoder, m2, mu, std, m1
59
+
60
+ # Load models
61
+ FM, vocoder, m2, mu, std = load_cfm(m2_checkpoint, vocoder_checkpoint, device)
62
+ m1 = load_t2s_model(m1_checkpoint, device)
63
+
64
+
65
+ def generate_audio(text, language):
66
+
67
+ ref_clips = [
68
+ 'speakers/female1/train_hindifemale_02794.wav',
69
+ 'speakers/female1/train_hindifemale_04167.wav',
70
+ 'speakers/female1/train_hindifemale_02795.wav'
71
+ ]
72
+
73
+ text_ids, code_ids, language_code, ref_mels_m1, ref_mels_m2 = prepare_inputs(
74
+ text.lower(),
75
+ ref_clips_m1=ref_clips,
76
+ ref_clips_m2=ref_clips,
77
+ language=language,
78
+ device=device
79
+ )
80
+
81
+ audio_wav = infer(m1, m2, vocoder, FM, mu, std, text_ids, code_ids, language_code, ref_mels_m1, ref_mels_m2, device)
82
+ return 24000,audio_wav
83
+
84
+ ```
85
+
86
 
87
 
88
  ### Model Params