Spaces:

umarabbas890
/

AIVoiceoverGeneratorwithEmotionControl

Build error

App Files Files Community

umarabbas890 commited on Jun 17, 2025

Commit

3957ee2

verified ·

1 Parent(s): bd1ed7e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -28

app.py CHANGED Viewed

@@ -1,54 +1,58 @@
 import gradio as gr
 from TTS.api import TTS
-import uuid
 import os
 from pydub import AudioSegment
-# Load XTTS v2 model (slow on CPU, suitable for ~10s voiceovers)
-tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
-# Define emotional styles mapping (XTTS uses style_wav or speaker embedding)
-EMOTIONS = {
-    "Neutral": None,
     "Sad": "sad",
     "Happy": "happy",
     "Angry": "angry",
     "Excited": "excited"
 }
-# Generate speech function
 def generate_voice(script, emotion):
-    # Output path as WAV first
-    wav_output_path = f"output_{uuid.uuid4().hex}.wav"
-    # XTTS v2 supports emotion as "style" param
     tts.tts_to_file(
         text=script,
-        file_path=wav_output_path,
-        speaker_wav=None,  # You can also add your own voice sample here
         language="en",
-        emotion=EMOTIONS[emotion]  # Optional style tag
     )
     # Convert to MP3 using pydub
-    mp3_output_path = wav_output_path.replace(".wav", ".mp3")
-    audio = AudioSegment.from_wav(wav_output_path)
-    audio.export(mp3_output_path, format="mp3")
-    return mp3_output_path, mp3_output_path
-# Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎙️ AI Voiceover Generator with Emotions")
-    script_input = gr.Textbox(label="🎤 Script", placeholder="Enter your voiceover script...", lines=4, max_lines=4)
-    emotion_input = gr.Dropdown(label="🎭 Emotion", choices=list(EMOTIONS.keys()), value="Neutral")
-    generate_btn = gr.Button("Generate Voice")
-    audio_output = gr.Audio(label="🎧 Listen", type="filepath")
-    download_output = gr.File(label="⬇️ Download MP3")
-    generate_btn.click(fn=generate_voice, inputs=[script_input, emotion_input], outputs=[audio_output, download_output])
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from TTS.api import TTS
 import os
+import uuid
 from pydub import AudioSegment
+# Load XTTS v2 model (correct model path)
+tts = TTS(model_name="tts_models/en/xtts_v2", progress_bar=False, gpu=False)
+# Emotions mapped to style embeddings
+emotion_styles = {
+    "Neutral": "neutral",
     "Sad": "sad",
     "Happy": "happy",
     "Angry": "angry",
     "Excited": "excited"
 }
+# Generate voice and save as MP3
 def generate_voice(script, emotion):
+    if not script.strip():
+        return "Please enter a script.", None
+    style = emotion_styles.get(emotion, "neutral")
+    # Generate speech (XTTS auto-selects English and supports style)
+    output_path = f"output_{uuid.uuid4().hex}.wav"
     tts.tts_to_file(
         text=script,
+        speaker_wav=None,
         language="en",
+        file_path=output_path,
+        style_wav=None,
+        style=style
     )
     # Convert to MP3 using pydub
+    mp3_path = output_path.replace(".wav", ".mp3")
+    sound = AudioSegment.from_wav(output_path)
+    sound.export(mp3_path, format="mp3")
+    return mp3_path, mp3_path
+# Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎙️ AI Voiceover Generator with Emotion Control\nConvert your script into a voiceover with the tone you choose!")
+    with gr.Row():
+        script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...")
+        emotion_choice = gr.Dropdown(["Neutral", "Sad", "Happy", "Angry", "Excited"], label="Select Emotion", value="Neutral")
+    generate_button = gr.Button("🎤 Generate Voiceover")
+    audio_output = gr.Audio(label="Listen", type="filepath")
+    download_link = gr.File(label="Download MP3")
+    generate_button.click(fn=generate_voice, inputs=[script_input, emotion_choice], outputs=[audio_output, download_link])
+demo.launch()