NLPV commited on
Commit
a960ed3
·
verified ·
1 Parent(s): f363328

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -15
app.py CHANGED
@@ -1,42 +1,48 @@
1
  import os
2
- import gradio as gr
3
  import tempfile
 
4
  import librosa
5
  import soundfile as sf
6
 
7
- # ========== Trust Required Configs for PyTorch 2.6+ ==========
8
  from torch.serialization import add_safe_globals
9
  from TTS.tts.configs.xtts_config import XttsConfig
10
- from TTS.tts.models.xtts import XttsAudioConfig
11
  from TTS.config.shared_configs import BaseDatasetConfig
12
 
13
- add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig])
 
 
 
 
 
14
 
15
- # ========== Agree to Coqui TTS Terms ==========
16
  os.environ["COQUI_TOS_AGREED"] = "1"
17
 
18
- # ========== Initialize the Coqui TTS Model ==========
19
  from TTS.api import TTS
 
20
  tts = TTS(
21
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
22
  progress_bar=True,
23
- gpu=False # Set to True if using a CUDA-compatible GPU
24
  )
25
 
26
- # ========== Inference Function ==========
27
  def text_to_speech_clone(text, voice_sample):
28
  if voice_sample is None:
29
  return "Please provide a voice sample audio.", None
30
 
31
- # Load voice sample
32
  sample_wav, sample_rate = librosa.load(voice_sample, sr=22050)
33
 
34
- # Save sample to temp file
35
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_sample:
36
  sf.write(tmp_sample.name, sample_wav, sample_rate)
37
  voice_sample_path = tmp_sample.name
38
 
39
- # Generate cloned Hindi speech
40
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_output:
41
  tts.tts_to_file(
42
  text=text,
@@ -48,7 +54,7 @@ def text_to_speech_clone(text, voice_sample):
48
 
49
  return output_path
50
 
51
- # ========== Gradio Interface ==========
52
  iface = gr.Interface(
53
  fn=text_to_speech_clone,
54
  inputs=[
@@ -58,10 +64,10 @@ iface = gr.Interface(
58
  outputs=gr.Audio(type="filepath", label="Generated Cloned Speech"),
59
  title="Hindi Text-to-Speech with Voice Cloning",
60
  description=(
61
- "Generate Hindi speech from text with voice cloning capability.\n"
62
- "Provide a short Hindi voice sample (510 seconds) to clone its voice tone."
63
  )
64
  )
65
 
66
- # ========== Launch ==========
67
  iface.launch()
 
1
  import os
 
2
  import tempfile
3
+ import gradio as gr
4
  import librosa
5
  import soundfile as sf
6
 
7
+ # ===== Step 1: Allowlist Required Classes for PyTorch >= 2.6 =====
8
  from torch.serialization import add_safe_globals
9
  from TTS.tts.configs.xtts_config import XttsConfig
10
+ from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
11
  from TTS.config.shared_configs import BaseDatasetConfig
12
 
13
+ add_safe_globals([
14
+ XttsConfig,
15
+ XttsAudioConfig,
16
+ XttsArgs,
17
+ BaseDatasetConfig
18
+ ])
19
 
20
+ # ===== Step 2: Agree to Coqui TTS Terms of Service =====
21
  os.environ["COQUI_TOS_AGREED"] = "1"
22
 
23
+ # ===== Step 3: Load the Coqui XTTS Model =====
24
  from TTS.api import TTS
25
+
26
  tts = TTS(
27
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
28
  progress_bar=True,
29
+ gpu=False # Set to True if using CUDA
30
  )
31
 
32
+ # ===== Step 4: Define Voice Cloning Inference Function =====
33
  def text_to_speech_clone(text, voice_sample):
34
  if voice_sample is None:
35
  return "Please provide a voice sample audio.", None
36
 
37
+ # Load the voice sample audio file
38
  sample_wav, sample_rate = librosa.load(voice_sample, sr=22050)
39
 
40
+ # Save sample temporarily in correct format
41
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_sample:
42
  sf.write(tmp_sample.name, sample_wav, sample_rate)
43
  voice_sample_path = tmp_sample.name
44
 
45
+ # Generate cloned Hindi speech and save it to a temp file
46
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_output:
47
  tts.tts_to_file(
48
  text=text,
 
54
 
55
  return output_path
56
 
57
+ # ===== Step 5: Gradio UI Interface =====
58
  iface = gr.Interface(
59
  fn=text_to_speech_clone,
60
  inputs=[
 
64
  outputs=gr.Audio(type="filepath", label="Generated Cloned Speech"),
65
  title="Hindi Text-to-Speech with Voice Cloning",
66
  description=(
67
+ "यह ऐप हिंदी टेक्स्ट से वॉयस क्लोनिंग के साथ स्पीच जेनरेट करता है।\n"
68
+ "एक छोटी सी हिंदी आवाज़ की रिकॉर्डिंग (5-10 सेकंड) अपलोड करें, और यह उसी आवाज़ में टेक्स्ट पढ़कर सुनाएगा।"
69
  )
70
  )
71
 
72
+ # ===== Step 6: Launch the Web App =====
73
  iface.launch()