Spaces:

PlotweaverModel
/

AudioBook

Running

App Files Files Community

PlotweaverModel commited on 9 days ago

Commit

bfd700b

verified ·

1 Parent(s): 3a7a3f7

Upload app.py

Browse files

Files changed (1) hide show

app.py +42 -36

app.py CHANGED Viewed

@@ -1,14 +1,11 @@
 """
 Audiobook Generator - English Source to Multi-Language Audio
-Powered by:
-  - Qwen3.5-Omni-Plus (preset voices, 36 languages)
-  - Qwen3-TTS-VC (voice cloning, 10 languages)
-  - YourVoic API (1000+ emotional voices, 93+ languages including Arabic, Swahili, Indian languages)
 Deploy as a Hugging Face Space:
   1. Create a new Space (SDK: Gradio)
   2. Upload app.py and requirements.txt
-  3. Add secrets: DASHSCOPE_API_KEY (required), YOURVOIC_API_KEY (optional)
 """
 import os
@@ -315,11 +312,11 @@ def generate_speech_yourvoic_with_retry(client, text, voice, yv_model, emotion,
     return None, text, f"No valid voice found for {language}. This language may not be supported on your plan. Tried: {candidates[:8]}"
 YOURVOIC_MODELS = [
-    "aura-prime -- Balanced quality and speed (recommended)",
-    "aura-lite -- Fast, good for previews",
-    "aura-max -- Premium quality (paid plans only)",
-    "rapid-max -- Fast with good quality",
-    "rapid-flash -- Fastest, real-time apps",
 ]
 YOURVOIC_EMOTIONS = [
@@ -333,7 +330,16 @@ def get_voice_name(label):
 def get_yourvoic_model(label):
-    return label.split("--")[0].strip()
 # ==========================================
@@ -447,7 +453,7 @@ def split_text_into_chunks(text, max_chars=MAX_CHARS_PER_CHUNK):
 # ==========================================
-# VOICE CLONING (Qwen)
 # ==========================================
 def prepare_clone_audio(audio_path):
     result = subprocess.run(
@@ -491,7 +497,7 @@ def clone_voice(audio_path, api_key):
 # ==========================================
-# TRANSLATION (Qwen text-only)
 # ==========================================
 def translate_text(client, text, target_language, lang_config):
     response = client.chat.completions.create(
@@ -505,7 +511,7 @@ def translate_text(client, text, target_language, lang_config):
 # ==========================================
-# TTS MODE 1: PRESET VOICE (Qwen Omni)
 # ==========================================
 def generate_speech_preset(client, text, voice, language, lang_config, translate, chunk_index, output_dir):
     output_wav = os.path.join(output_dir, f"chunk_{chunk_index:04d}.wav")
@@ -545,7 +551,7 @@ def generate_speech_preset(client, text, voice, language, lang_config, translate
 # ==========================================
-# TTS MODE 2: CLONED VOICE (Qwen TTS-VC)
 # ==========================================
 def generate_speech_cloned(client, text, voice_id, language, lang_config, translate, api_key, chunk_index, output_dir):
     output_wav = os.path.join(output_dir, f"vc_chunk_{chunk_index:04d}.wav")
@@ -573,11 +579,11 @@ def generate_speech_cloned(client, text, voice_id, language, lang_config, transl
 # ==========================================
-# TTS MODE 3: YOURVOIC (emotional voices, 93+ languages)
 # ==========================================
 def generate_speech_yourvoic(client, text, voice, yv_model, emotion, language, lang_config, translate,
                              api_key, chunk_index, output_dir):
-    """Generate speech using YourVoic API. Handles translation via Qwen then TTS via YourVoic."""
     output_file = os.path.join(output_dir, f"yv_chunk_{chunk_index:04d}.mp3")
     # Translate if needed
@@ -675,7 +681,7 @@ def generate_audiobook(text_input, file_input, target_language, voice_mode,
     lang_config = LANGUAGES[target_language]
     lang_engine = lang_config["engine"]
     use_clone = voice_mode == "Clone a Voice"
-    use_yourvoic = voice_mode == "YourVoic (Emotional AI)"
     translate = target_language != "English"
     # Auto-correct engine if language requires it
@@ -691,12 +697,12 @@ def generate_audiobook(text_input, file_input, target_language, voice_mode,
     # Validate keys
     if use_yourvoic:
         if not yv_key:
-            raise gr.Error("YOURVOIC_API_KEY not set. Add it in Settings > Secrets. Get one at yourvoic.com/api/user")
         if translate and not ds_key:
-            raise gr.Error("DASHSCOPE_API_KEY needed for translation. Add it in Settings > Secrets.")
     else:
         if not ds_key:
-            raise gr.Error("DASHSCOPE_API_KEY not set. Add it in Settings > Secrets.")
     client = OpenAI(api_key=ds_key, base_url=DASHSCOPE_BASE_URL) if ds_key else None
     tmp_dir = tempfile.mkdtemp(prefix="audiobook_")
@@ -782,14 +788,14 @@ def generate_audiobook(text_input, file_input, target_language, voice_mode,
         audio_size = os.path.getsize(final_mp3) / (1024 * 1024)
         if use_yourvoic:
-            voice_info = f"YourVoic: {yourvoic_voice_label} ({yourvoic_emotion})"
-            mode_info = f"YourVoic API ({yourvoic_model_label})"
         elif use_clone:
             voice_info = f"Cloned (ID: {cloned_voice_id[:20]}...)"
-            mode_info = "Qwen3-TTS-VC"
         else:
             voice_info = preset_voice_label
-            mode_info = "Qwen3.5-Omni-Plus"
         stats = (
             f"**Audiobook Generated!**\n\n"
@@ -828,7 +834,7 @@ And he would smile - that slow, careful smile that seemed to cost him something
 DESCRIPTION = """
 # Audiobook Generator
-### English Text to Multi-Language Audiobook
 """
@@ -899,7 +905,7 @@ def on_language_change(lang_choice):
             gr.update(visible=True, choices=voice_choices, value=default_voice),  # yv_voice
             gr.update(visible=True),   # yv_model
             gr.update(visible=True),   # yv_emotion
-            gr.update(value=f"Engine: YourVoic (1000+ emotional voices)"),  # engine_label
             gr.update(visible=False, value=False),  # use_clone
             gr.update(visible=False),  # clone_audio
             gr.update(visible=False),  # clone_info
@@ -910,7 +916,7 @@ def on_language_change(lang_choice):
             gr.update(visible=False),  # yv_voice
             gr.update(visible=False),  # yv_model
             gr.update(visible=False),  # yv_emotion
-            gr.update(value=f"Engine: Qwen3.5-Omni-Plus (translate + speak)"),  # engine_label
             gr.update(visible=True),   # use_clone
             gr.update(visible=False),  # clone_audio
             gr.update(visible=False),  # clone_info
@@ -934,7 +940,7 @@ def generate_wrapper(text_input, file_input, language_choice, use_clone,
     if use_clone:
         voice_mode = "Clone a Voice"
     elif engine == "yourvoic":
-        voice_mode = "YourVoic (Emotional AI)"
     else:
         voice_mode = "Preset Voice"
@@ -960,7 +966,7 @@ with gr.Blocks(title="Audiobook Generator") as demo:
             target_lang = gr.Dropdown(choices=lang_choices, value="English", label="Target Language",
                                       info="The right voice engine is selected automatically based on language.")
-            engine_label = gr.Markdown(value="Engine: Qwen3.5-Omni-Plus (translate + speak)")
             # Qwen preset voice (visible for Qwen languages)
             preset_voice = gr.Dropdown(choices=PRESET_VOICES, value="Jennifer -- Cinematic narrator",
@@ -968,16 +974,16 @@ with gr.Blocks(title="Audiobook Generator") as demo:
             # YourVoic controls (visible for YourVoic languages)
             yv_voice = gr.Dropdown(choices=YOURVOIC_VOICES_DEFAULT, value="Peter -- Universal fallback",
-                                   label="YourVoic Voice", visible=False, allow_custom_value=True,
-                                   info="Voices update automatically per language. Peter works for all.")
-            yv_model = gr.Dropdown(choices=YOURVOIC_MODELS, value="aura-prime -- Balanced quality and speed (recommended)",
-                                   label="YourVoic Model", visible=False)
             yv_emotion = gr.Dropdown(choices=YOURVOIC_EMOTIONS, value="friendly",
                                      label="Emotion Style", visible=False,
                                      info="Add emotional expression to the narration")
             # Voice cloning toggle (optional, works for Qwen languages only)
-            use_clone = gr.Checkbox(value=False, label="Use Voice Cloning (Qwen, 10 languages only)",
                                     info="Clone a voice from audio sample instead of using preset")
             clone_audio = gr.Audio(label="Voice Sample (10s-3min)", type="filepath", visible=False)
             clone_info = gr.Markdown(
@@ -1015,7 +1021,7 @@ with gr.Blocks(title="Audiobook Generator") as demo:
     gr.Markdown(
         "---\n"
-        )
 if __name__ == "__main__":
     demo.launch()

 """
 Audiobook Generator - English Source to Multi-Language Audio
+Supports 51 languages with preset voices, voice cloning, and emotional AI voices.
 Deploy as a Hugging Face Space:
   1. Create a new Space (SDK: Gradio)
   2. Upload app.py and requirements.txt
+  3. Add required API secrets in Settings
 """
 import os
     return None, text, f"No valid voice found for {language}. This language may not be supported on your plan. Tried: {candidates[:8]}"
 YOURVOIC_MODELS = [
+    "balanced -- Balanced quality and speed (recommended)",
+    "lite -- Fast, good for previews",
+    "premium -- Premium quality (paid plans only)",
+    "fast -- Fast with good quality",
+    "realtime -- Fastest, real-time apps",
 ]
 YOURVOIC_EMOTIONS = [
 def get_yourvoic_model(label):
+    """Map anonymous model label to actual API model name."""
+    name = label.split("--")[0].strip()
+    model_map = {
+        "balanced": "aura-prime",
+        "lite": "aura-lite",
+        "premium": "aura-max",
+        "fast": "rapid-max",
+        "realtime": "rapid-flash",
+    }
+    return model_map.get(name, "aura-prime")
 # ==========================================
 # ==========================================
+# VOICE CLONING
 # ==========================================
 def prepare_clone_audio(audio_path):
     result = subprocess.run(
 # ==========================================
+# TRANSLATION
 # ==========================================
 def translate_text(client, text, target_language, lang_config):
     response = client.chat.completions.create(
 # ==========================================
+# TTS MODE 1: PRESET VOICE
 # ==========================================
 def generate_speech_preset(client, text, voice, language, lang_config, translate, chunk_index, output_dir):
     output_wav = os.path.join(output_dir, f"chunk_{chunk_index:04d}.wav")
 # ==========================================
+# TTS MODE 2: CLONED VOICE
 # ==========================================
 def generate_speech_cloned(client, text, voice_id, language, lang_config, translate, api_key, chunk_index, output_dir):
     output_wav = os.path.join(output_dir, f"vc_chunk_{chunk_index:04d}.wav")
 # ==========================================
+# TTS MODE 3: EMOTIONAL AI VOICES
 # ==========================================
 def generate_speech_yourvoic(client, text, voice, yv_model, emotion, language, lang_config, translate,
                              api_key, chunk_index, output_dir):
+    """Generate speech using emotional AI voice API."""
     output_file = os.path.join(output_dir, f"yv_chunk_{chunk_index:04d}.mp3")
     # Translate if needed
     lang_config = LANGUAGES[target_language]
     lang_engine = lang_config["engine"]
     use_clone = voice_mode == "Clone a Voice"
+    use_yourvoic = voice_mode == "Emotional AI"
     translate = target_language != "English"
     # Auto-correct engine if language requires it
     # Validate keys
     if use_yourvoic:
         if not yv_key:
+            raise gr.Error("Voice API key for emotional voices not set. Add YOURVOIC_API_KEY in Settings > Secrets.")
         if translate and not ds_key:
+            raise gr.Error("Translation API key not set. Add DASHSCOPE_API_KEY in Settings > Secrets.")
     else:
         if not ds_key:
+            raise gr.Error("Voice API key not set. Add DASHSCOPE_API_KEY in Settings > Secrets.")
     client = OpenAI(api_key=ds_key, base_url=DASHSCOPE_BASE_URL) if ds_key else None
     tmp_dir = tempfile.mkdtemp(prefix="audiobook_")
         audio_size = os.path.getsize(final_mp3) / (1024 * 1024)
         if use_yourvoic:
+            voice_info = f"Emotional AI: {yourvoic_voice_label} ({yourvoic_emotion})"
+            mode_info = f"Emotional AI Engine"
         elif use_clone:
             voice_info = f"Cloned (ID: {cloned_voice_id[:20]}...)"
+            mode_info = "Voice Clone Engine"
         else:
             voice_info = preset_voice_label
+            mode_info = "Premium AI Engine"
         stats = (
             f"**Audiobook Generated!**\n\n"
 DESCRIPTION = """
 # Audiobook Generator
+### English Text to Multi-Language Audiobook (51 Languages)
 """
             gr.update(visible=True, choices=voice_choices, value=default_voice),  # yv_voice
             gr.update(visible=True),   # yv_model
             gr.update(visible=True),   # yv_emotion
+            gr.update(value=f"Engine: Emotional AI Voices"),  # engine_label
             gr.update(visible=False, value=False),  # use_clone
             gr.update(visible=False),  # clone_audio
             gr.update(visible=False),  # clone_info
             gr.update(visible=False),  # yv_voice
             gr.update(visible=False),  # yv_model
             gr.update(visible=False),  # yv_emotion
+            gr.update(value=f"Engine: Premium AI Voices"),  # engine_label
             gr.update(visible=True),   # use_clone
             gr.update(visible=False),  # clone_audio
             gr.update(visible=False),  # clone_info
     if use_clone:
         voice_mode = "Clone a Voice"
     elif engine == "yourvoic":
+        voice_mode = "Emotional AI"
     else:
         voice_mode = "Preset Voice"
             target_lang = gr.Dropdown(choices=lang_choices, value="English", label="Target Language",
                                       info="The right voice engine is selected automatically based on language.")
+            engine_label = gr.Markdown(value="Engine: Premium AI Voices")
             # Qwen preset voice (visible for Qwen languages)
             preset_voice = gr.Dropdown(choices=PRESET_VOICES, value="Jennifer -- Cinematic narrator",
             # YourVoic controls (visible for YourVoic languages)
             yv_voice = gr.Dropdown(choices=YOURVOIC_VOICES_DEFAULT, value="Peter -- Universal fallback",
+                                   label="Voice", visible=False, allow_custom_value=True,
+                                   info="Voices update automatically per language.")
+            yv_model = gr.Dropdown(choices=YOURVOIC_MODELS, value="balanced -- Balanced quality and speed (recommended)",
+                                   label="AI Model", visible=False)
             yv_emotion = gr.Dropdown(choices=YOURVOIC_EMOTIONS, value="friendly",
                                      label="Emotion Style", visible=False,
                                      info="Add emotional expression to the narration")
             # Voice cloning toggle (optional, works for Qwen languages only)
+            use_clone = gr.Checkbox(value=False, label="Use Voice Cloning (10 core languages only)",
                                     info="Clone a voice from audio sample instead of using preset")
             clone_audio = gr.Audio(label="Voice Sample (10s-3min)", type="filepath", visible=False)
             clone_info = gr.Markdown(
     gr.Markdown(
         "---\n"
+    )
 if __name__ == "__main__":
     demo.launch()