Spaces:

FINAL-Bench
/

MoneyPrinterV2

Running

App Files Files Community

SeaWolf-AI commited on Apr 5

Commit

1b20d8b

verified ·

1 Parent(s): 400d937

Fix build: replace KittenTTS with edge-tts, pin Python 3.12, add packages.txt for imagemagick

Browse files

Files changed (4) hide show

README.md +2 -1
packages.txt +2 -0
requirements.txt +3 -2
src/classes/Tts.py +71 -7

README.md CHANGED Viewed

@@ -5,6 +5,7 @@ colorFrom: blue
 colorTo: purple
 sdk: gradio
 sdk_version: "4.44.1"
 app_file: app.py
 pinned: false
 ---
@@ -16,7 +17,7 @@ AI-powered YouTube Shorts video generation pipeline:
 1. **Topic Generation** — LLM generates a video topic from your niche
 2. **Script Writing** — LLM writes a short video script
 3. **Image Generation** — Gemini API creates visuals from AI prompts
-4. **Text-to-Speech** — KittenTTS converts script to natural speech
 5. **Subtitle Generation** — faster-whisper creates synced subtitles
 6. **Video Assembly** — MoviePy combines everything into a downloadable MP4

 colorTo: purple
 sdk: gradio
 sdk_version: "4.44.1"
+python_version: "3.12"
 app_file: app.py
 pinned: false
 ---
 1. **Topic Generation** — LLM generates a video topic from your niche
 2. **Script Writing** — LLM writes a short video script
 3. **Image Generation** — Gemini API creates visuals from AI prompts
+4. **Text-to-Speech** — Edge TTS converts script to natural speech
 5. **Subtitle Generation** — faster-whisper creates synced subtitles
 6. **Video Assembly** — MoviePy combines everything into a downloadable MP4

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ imagemagick
2	+ ffmpeg

requirements.txt CHANGED Viewed

@@ -2,9 +2,10 @@ gradio>=4.0
 huggingface_hub>=0.20.0
 termcolor
 requests
-kittentts @ https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl
 soundfile
-moviepy
 Pillow>=10.0.0
 faster-whisper
 srt_equalizer

 huggingface_hub>=0.20.0
 termcolor
 requests
+edge-tts
+pydub
 soundfile
+moviepy<2
 Pillow>=10.0.0
 faster-whisper
 srt_equalizer

src/classes/Tts.py CHANGED Viewed

@@ -1,18 +1,82 @@
 import os
-import soundfile as sf
-from kittentts import KittenTTS as KittenModel
-from config import ROOT_DIR, get_tts_voice
-KITTEN_MODEL = "KittenML/kitten-tts-mini-0.8"
-KITTEN_SAMPLE_RATE = 24000
 class TTS:
     def __init__(self) -> None:
-        self._model = KittenModel(KITTEN_MODEL)
         self._voice = get_tts_voice()
     def synthesize(self, text, output_file=os.path.join(ROOT_DIR, ".mp", "audio.wav")):
         audio = self._model.generate(text, voice=self._voice)
-        sf.write(output_file, audio, KITTEN_SAMPLE_RATE)
         return output_file

 import os
+from config import ROOT_DIR, get_tts_voice, is_running_in_spaces
+# Voice mapping: friendly name -> edge-tts voice ID
+EDGE_TTS_VOICES = {
+    "Jasper": "en-US-GuyNeural",
+    "Bella": "en-US-JennyNeural",
+    "Luna": "en-GB-SoniaNeural",
+    "Bruno": "en-US-ChristopherNeural",
+    "Rosie": "en-AU-NatashaNeural",
+    "Hugo": "en-GB-RyanNeural",
+    "Kiki": "en-US-AriaNeural",
+    "Leo": "en-US-DavisNeural",
+}
+def _use_edge_tts() -> bool:
+    """Use edge-tts when KittenTTS is not available (e.g. on HF Spaces)."""
+    if is_running_in_spaces():
+        return True
+    try:
+        from kittentts import KittenTTS  # noqa: F401
+        return False
+    except ImportError:
+        return True
 class TTS:
     def __init__(self) -> None:
         self._voice = get_tts_voice()
+        self._use_edge = _use_edge_tts()
+        if not self._use_edge:
+            import soundfile  # noqa: F401 — ensure available
+            from kittentts import KittenTTS as KittenModel
+            self._model = KittenModel("KittenML/kitten-tts-mini-0.8")
+            self._sample_rate = 24000
+        else:
+            self._model = None
     def synthesize(self, text, output_file=os.path.join(ROOT_DIR, ".mp", "audio.wav")):
+        if self._use_edge:
+            return self._synthesize_edge(text, output_file)
+        return self._synthesize_kitten(text, output_file)
+    def _synthesize_kitten(self, text, output_file):
+        import soundfile as sf
         audio = self._model.generate(text, voice=self._voice)
+        sf.write(output_file, audio, self._sample_rate)
+        return output_file
+    def _synthesize_edge(self, text, output_file):
+        import asyncio
+        import edge_tts
+        voice_id = EDGE_TTS_VOICES.get(self._voice, "en-US-GuyNeural")
+        # edge-tts outputs mp3; we write to mp3 then keep as-is
+        # MoviePy can handle mp3 audio via ffmpeg
+        mp3_path = output_file.rsplit(".", 1)[0] + ".mp3"
+        async def _generate():
+            communicate = edge_tts.Communicate(text, voice_id)
+            await communicate.save(mp3_path)
+        asyncio.run(_generate())
+        # Convert mp3 to wav for compatibility with the rest of the pipeline
+        try:
+            from pydub import AudioSegment
+            audio = AudioSegment.from_mp3(mp3_path)
+            audio.export(output_file, format="wav")
+            os.remove(mp3_path)
+        except ImportError:
+            # If pydub not available, just use the mp3 directly
+            # Rename mp3 to the expected output path
+            if os.path.exists(output_file):
+                os.remove(output_file)
+            os.rename(mp3_path, output_file)
         return output_file