Spaces:

VIDraft
/

Voice-Clone-Podcast

Runtime error

App Files Files Community

seawolf2357 commited on May 30

Commit

d41998a

verified ·

1 Parent(s): a9d13cd

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -24

app.py CHANGED Viewed

@@ -53,8 +53,23 @@ except ImportError:
 try:
     from chatterbox.src.chatterbox.tts import ChatterboxTTS
     CHATTERBOX_AVAILABLE = True
 except ImportError:
-    CHATTERBOX_AVAILABLE = False
 # Import config and prompts
 from config_prompts import (
@@ -573,10 +588,14 @@ class UnifiedAudioConverter:
         Chatterbox TTS를 사용하여 대화를 음성으로 변환
         """
         if not CHATTERBOX_AVAILABLE:
-            raise RuntimeError("Chatterbox TTS not available")
-        # GPU 함수 내에서 모델 로드
-        model = ChatterboxTTS.from_pretrained(DEVICE)
         if seed_num_input != 0:
             set_seed(int(seed_num_input))
@@ -588,7 +607,7 @@ class UnifiedAudioConverter:
             if not text.strip():
                 continue
-            print(f"생성 중: Speaker {i+1} - '{text[:50]}...'")
             try:
                 # 텍스트가 짧으면 단일 생성
@@ -605,9 +624,11 @@ class UnifiedAudioConverter:
                 else:
                     # 긴 텍스트는 청크로 분할
                     chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
                     chunk_audio_segments = []
-                    for chunk in chunks:
                         wav = model.generate(
                             chunk,
                             audio_prompt_path=audio_prompt_path_input,
@@ -633,11 +654,15 @@ class UnifiedAudioConverter:
                         audio_segments.append(concatenated_turn)
             except Exception as e:
-                print(f"Speaker {i+1} 생성 중 오류 발생: {e}")
                 continue
         if not audio_segments:
-            raise RuntimeError("오디오 생성에 실패했습니다.")
         # 모든 스피커의 오디오 세그먼트 연결
         speaker_silence_duration = int(0.5 * model.sr)  # 스피커 간 0.5초 무음
@@ -651,7 +676,7 @@ class UnifiedAudioConverter:
         concatenated_audio = np.concatenate(final_audio)
-        print(f"오디오 생성 완료. 총 길이: {len(concatenated_audio) / model.sr:.2f}초")
         return (model.sr, concatenated_audio)
     def _create_output_directory(self) -> str:
@@ -739,6 +764,9 @@ async def regenerate_audio(
     if not conversation_text.strip():
         return "Please provide conversation text.", None
     try:
         conversation_json = converter.parse_conversation_text(conversation_text)
@@ -746,25 +774,34 @@ async def regenerate_audio(
             return "No valid conversation found in the text.", None
         # Generate audio using Chatterbox TTS
-        sr, audio = converter.generate_tts_audio_gpu(
-            conversation_json,
-            ref_audio_path,
-            exaggeration,
-            temperature,
-            seed_num,
-            cfg_weight,
-            chunk_size
-        )
-        # Save audio to file
-        output_dir = converter._create_output_directory()
-        output_file = os.path.join(output_dir, "podcast_audio.wav")
-        sf.write(output_file, audio, sr)
-        return "Audio generated successfully!", output_file
     except Exception as e:
-        return f"Error generating audio: {str(e)}", None
 def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):

 try:
     from chatterbox.src.chatterbox.tts import ChatterboxTTS
     CHATTERBOX_AVAILABLE = True
+    print("✅ Chatterbox TTS imported successfully from chatterbox.src.chatterbox.tts")
 except ImportError:
+    try:
+        from chatterbox.tts import ChatterboxTTS
+        CHATTERBOX_AVAILABLE = True
+        print("✅ Chatterbox TTS imported successfully from chatterbox.tts")
+    except ImportError:
+        try:
+            # 다른 가능한 경로 시도
+            import sys
+            sys.path.append('/usr/local/lib/python3.10/site-packages')
+            from chatterbox import ChatterboxTTS
+            CHATTERBOX_AVAILABLE = True
+            print("✅ Chatterbox TTS imported successfully from chatterbox")
+        except ImportError:
+            CHATTERBOX_AVAILABLE = False
+            print("❌ Chatterbox TTS not available - falling back to text-only mode")
 # Import config and prompts
 from config_prompts import (
         Chatterbox TTS를 사용하여 대화를 음성으로 변환
         """
         if not CHATTERBOX_AVAILABLE:
+            raise RuntimeError("Chatterbox TTS not available. Please install chatterbox package.")
+        try:
+            # GPU 함수 내에서 모델 로드
+            model = ChatterboxTTS.from_pretrained(DEVICE)
+            print(f"✅ Chatterbox TTS model loaded on {DEVICE}")
+        except Exception as e:
+            raise RuntimeError(f"Failed to load Chatterbox TTS model: {e}")
         if seed_num_input != 0:
             set_seed(int(seed_num_input))
             if not text.strip():
                 continue
+            print(f"🎙️ 생성 중: Speaker {i+1} - '{text[:50]}...'")
             try:
                 # 텍스트가 짧으면 단일 생성
                 else:
                     # 긴 텍스트는 청크로 분할
                     chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
+                    print(f"📝 텍스트를 {len(chunks)}개 청크로 분할")
                     chunk_audio_segments = []
+                    for j, chunk in enumerate(chunks):
+                        print(f"  📄 청크 {j+1}/{len(chunks)} 생성 중...")
                         wav = model.generate(
                             chunk,
                             audio_prompt_path=audio_prompt_path_input,
                         audio_segments.append(concatenated_turn)
             except Exception as e:
+                print(f"❌ Speaker {i+1} 생성 중 오류 발생: {e}")
+                # 오류 발생 시 무음으로 대체
+                silence_duration = int(2.0 * model.sr)  # 2초 무음
+                silence = np.zeros(silence_duration)
+                audio_segments.append(silence)
                 continue
         if not audio_segments:
+            raise RuntimeError("모든 오디오 생성에 실패했습니다.")
         # 모든 스피커의 오디오 세그먼트 연결
         speaker_silence_duration = int(0.5 * model.sr)  # 스피커 간 0.5초 무음
         concatenated_audio = np.concatenate(final_audio)
+        print(f"🎉 오디오 생성 완료! 총 길이: {len(concatenated_audio) / model.sr:.2f}초")
         return (model.sr, concatenated_audio)
     def _create_output_directory(self) -> str:
     if not conversation_text.strip():
         return "Please provide conversation text.", None
+    if not CHATTERBOX_AVAILABLE:
+        return "Chatterbox TTS not available. Please check the installation.", None
     try:
         conversation_json = converter.parse_conversation_text(conversation_text)
             return "No valid conversation found in the text.", None
         # Generate audio using Chatterbox TTS
+        try:
+            sr, audio = converter.generate_tts_audio_gpu(
+                conversation_json,
+                ref_audio_path,
+                exaggeration,
+                temperature,
+                seed_num,
+                cfg_weight,
+                chunk_size
+            )
+            # Save audio to file
+            output_dir = converter._create_output_directory()
+            output_file = os.path.join(output_dir, "podcast_audio.wav")
+            sf.write(output_file, audio, sr)
+            return "🎉 Audio generated successfully!", output_file
+        except Exception as e:
+            error_msg = str(e)
+            if "Chatterbox TTS not available" in error_msg:
+                return "❌ Chatterbox TTS is not properly installed. Please check the requirements.", None
+            elif "CUDA" in error_msg or "GPU" in error_msg:
+                return f"❌ GPU error: {error_msg}. Please try reducing chunk size or use CPU.", None
+            else:
+                return f"❌ Audio generation error: {error_msg}", None
     except Exception as e:
+        return f"❌ Error processing conversation: {str(e)}", None
 def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):