umarabbas890 commited on
Commit
a2ae3c3
·
verified ·
1 Parent(s): 2acb2e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -31
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import os
2
- os.environ["COQUI_TOS_AGREED"] = "1" # Automatically accept Coqui ToS
3
 
4
  import torch.serialization
5
-
6
- # Add required Coqui XTTS classes to the trusted list
7
  torch.serialization.add_safe_globals([
8
  __import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig,
9
  __import__("TTS.tts.models.xtts").tts.models.xtts.XttsAudioConfig,
@@ -11,59 +9,53 @@ torch.serialization.add_safe_globals([
11
  __import__("TTS.config.shared_configs").config.shared_configs.BaseDatasetConfig
12
  ])
13
 
14
-
15
  import gradio as gr
16
  import torch
17
  import torchaudio
18
  from TTS.api import TTS
 
19
  import uuid
20
 
21
-
22
- # 🛠️ PyTorch 2.6+ fix for loading XTTS
23
- import torch.serialization
24
- torch.serialization.add_safe_globals([__import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig])
25
-
26
- # Load the XTTS model
27
  model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
28
  tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
29
 
30
-
31
- # Emotions mapped to style embeddings
32
- emotion_styles = {
33
- "Neutral": "neutral",
34
- "Sad": "sad",
35
- "Happy": "happy",
36
- "Angry": "angry",
37
- "Excited": "excited"
38
  }
39
 
40
- # Generate voice and save as MP3
41
- import os
42
-
43
- def generate_voice(text, speaker_audio_path):
44
  if not os.path.isfile(speaker_audio_path):
45
  raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio_path}")
46
 
 
 
 
 
 
47
  tts.tts_to_file(
48
  text=text,
49
  speaker_wav=speaker_audio_path,
50
  language="en",
51
- file_path="output.wav"
52
  )
53
- return "output.wav"
54
-
55
-
56
 
57
- # Convert to MP3 using pydub
58
- mp3_path = output_path.replace(".wav", ".mp3")
59
- sound = AudioSegment.from_wav(output_path)
60
  sound.export(mp3_path, format="mp3")
61
 
62
  return mp3_path, mp3_path
63
 
64
- # Gradio Interface
65
  with gr.Blocks() as demo:
66
- gr.Markdown("## 🎙️ AI Voiceover Generator with Emotion Control\nConvert your script into a voiceover with the tone you choose!")
67
 
68
  with gr.Row():
69
  script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...")
 
1
  import os
2
+ os.environ["COQUI_TOS_AGREED"] = "1"
3
 
4
  import torch.serialization
 
 
5
  torch.serialization.add_safe_globals([
6
  __import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig,
7
  __import__("TTS.tts.models.xtts").tts.models.xtts.XttsAudioConfig,
 
9
  __import__("TTS.config.shared_configs").config.shared_configs.BaseDatasetConfig
10
  ])
11
 
 
12
  import gradio as gr
13
  import torch
14
  import torchaudio
15
  from TTS.api import TTS
16
+ from pydub import AudioSegment
17
  import uuid
18
 
19
+ # Load XTTS model
 
 
 
 
 
20
  model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
21
  tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
22
 
23
+ # Map emotions to file paths
24
+ emotion_to_file = {
25
+ "Neutral": "samples/neutral.wav",
26
+ "Sad": "samples/sad.wav",
27
+ "Happy": "samples/happy.wav",
28
+ "Angry": "samples/angry.wav",
29
+ "Excited": "samples/excited.wav"
 
30
  }
31
 
32
+ # Voice generator
33
+ def generate_voice(text, emotion):
34
+ speaker_audio_path = emotion_to_file.get(emotion)
 
35
  if not os.path.isfile(speaker_audio_path):
36
  raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio_path}")
37
 
38
+ # Generate unique filenames to avoid overwrites
39
+ uid = uuid.uuid4().hex
40
+ wav_path = f"output_{uid}.wav"
41
+ mp3_path = f"output_{uid}.mp3"
42
+
43
  tts.tts_to_file(
44
  text=text,
45
  speaker_wav=speaker_audio_path,
46
  language="en",
47
+ file_path=wav_path
48
  )
 
 
 
49
 
50
+ # Convert to MP3
51
+ sound = AudioSegment.from_wav(wav_path)
 
52
  sound.export(mp3_path, format="mp3")
53
 
54
  return mp3_path, mp3_path
55
 
56
+ # Gradio UI
57
  with gr.Blocks() as demo:
58
+ gr.Markdown("## 🎙️ AI Voiceover Generator with Emotion Control")
59
 
60
  with gr.Row():
61
  script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...")