seawolf2357 commited on
Commit
d41998a
ยท
verified ยท
1 Parent(s): a9d13cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -24
app.py CHANGED
@@ -53,8 +53,23 @@ except ImportError:
53
  try:
54
  from chatterbox.src.chatterbox.tts import ChatterboxTTS
55
  CHATTERBOX_AVAILABLE = True
 
56
  except ImportError:
57
- CHATTERBOX_AVAILABLE = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # Import config and prompts
60
  from config_prompts import (
@@ -573,10 +588,14 @@ class UnifiedAudioConverter:
573
  Chatterbox TTS๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋Œ€ํ™”๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
574
  """
575
  if not CHATTERBOX_AVAILABLE:
576
- raise RuntimeError("Chatterbox TTS not available")
577
 
578
- # GPU ํ•จ์ˆ˜ ๋‚ด์—์„œ ๋ชจ๋ธ ๋กœ๋“œ
579
- model = ChatterboxTTS.from_pretrained(DEVICE)
 
 
 
 
580
 
581
  if seed_num_input != 0:
582
  set_seed(int(seed_num_input))
@@ -588,7 +607,7 @@ class UnifiedAudioConverter:
588
  if not text.strip():
589
  continue
590
 
591
- print(f"์ƒ์„ฑ ์ค‘: Speaker {i+1} - '{text[:50]}...'")
592
 
593
  try:
594
  # ํ…์ŠคํŠธ๊ฐ€ ์งง์œผ๋ฉด ๋‹จ์ผ ์ƒ์„ฑ
@@ -605,9 +624,11 @@ class UnifiedAudioConverter:
605
  else:
606
  # ๊ธด ํ…์ŠคํŠธ๋Š” ์ฒญํฌ๋กœ ๋ถ„ํ• 
607
  chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
 
608
 
609
  chunk_audio_segments = []
610
- for chunk in chunks:
 
611
  wav = model.generate(
612
  chunk,
613
  audio_prompt_path=audio_prompt_path_input,
@@ -633,11 +654,15 @@ class UnifiedAudioConverter:
633
  audio_segments.append(concatenated_turn)
634
 
635
  except Exception as e:
636
- print(f"Speaker {i+1} ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
 
 
 
 
637
  continue
638
 
639
  if not audio_segments:
640
- raise RuntimeError("์˜ค๋””์˜ค ์ƒ์„ฑ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
641
 
642
  # ๋ชจ๋“  ์Šคํ”ผ์ปค์˜ ์˜ค๋””์˜ค ์„ธ๊ทธ๋จผํŠธ ์—ฐ๊ฒฐ
643
  speaker_silence_duration = int(0.5 * model.sr) # ์Šคํ”ผ์ปค ๊ฐ„ 0.5์ดˆ ๋ฌด์Œ
@@ -651,7 +676,7 @@ class UnifiedAudioConverter:
651
 
652
  concatenated_audio = np.concatenate(final_audio)
653
 
654
- print(f"์˜ค๋””์˜ค ์ƒ์„ฑ ์™„๋ฃŒ. ์ด ๊ธธ์ด: {len(concatenated_audio) / model.sr:.2f}์ดˆ")
655
  return (model.sr, concatenated_audio)
656
 
657
  def _create_output_directory(self) -> str:
@@ -739,6 +764,9 @@ async def regenerate_audio(
739
  if not conversation_text.strip():
740
  return "Please provide conversation text.", None
741
 
 
 
 
742
  try:
743
  conversation_json = converter.parse_conversation_text(conversation_text)
744
 
@@ -746,25 +774,34 @@ async def regenerate_audio(
746
  return "No valid conversation found in the text.", None
747
 
748
  # Generate audio using Chatterbox TTS
749
- sr, audio = converter.generate_tts_audio_gpu(
750
- conversation_json,
751
- ref_audio_path,
752
- exaggeration,
753
- temperature,
754
- seed_num,
755
- cfg_weight,
756
- chunk_size
757
- )
 
758
 
759
- # Save audio to file
760
- output_dir = converter._create_output_directory()
761
- output_file = os.path.join(output_dir, "podcast_audio.wav")
762
- sf.write(output_file, audio, sr)
763
 
764
- return "Audio generated successfully!", output_file
 
 
 
 
 
 
 
 
765
 
766
  except Exception as e:
767
- return f"Error generating audio: {str(e)}", None
768
 
769
 
770
  def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):
 
53
  try:
54
  from chatterbox.src.chatterbox.tts import ChatterboxTTS
55
  CHATTERBOX_AVAILABLE = True
56
+ print("โœ… Chatterbox TTS imported successfully from chatterbox.src.chatterbox.tts")
57
  except ImportError:
58
+ try:
59
+ from chatterbox.tts import ChatterboxTTS
60
+ CHATTERBOX_AVAILABLE = True
61
+ print("โœ… Chatterbox TTS imported successfully from chatterbox.tts")
62
+ except ImportError:
63
+ try:
64
+ # ๋‹ค๋ฅธ ๊ฐ€๋Šฅํ•œ ๊ฒฝ๋กœ ์‹œ๋„
65
+ import sys
66
+ sys.path.append('/usr/local/lib/python3.10/site-packages')
67
+ from chatterbox import ChatterboxTTS
68
+ CHATTERBOX_AVAILABLE = True
69
+ print("โœ… Chatterbox TTS imported successfully from chatterbox")
70
+ except ImportError:
71
+ CHATTERBOX_AVAILABLE = False
72
+ print("โŒ Chatterbox TTS not available - falling back to text-only mode")
73
 
74
  # Import config and prompts
75
  from config_prompts import (
 
588
  Chatterbox TTS๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋Œ€ํ™”๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
589
  """
590
  if not CHATTERBOX_AVAILABLE:
591
+ raise RuntimeError("Chatterbox TTS not available. Please install chatterbox package.")
592
 
593
+ try:
594
+ # GPU ํ•จ์ˆ˜ ๋‚ด์—์„œ ๋ชจ๋ธ ๋กœ๋“œ
595
+ model = ChatterboxTTS.from_pretrained(DEVICE)
596
+ print(f"โœ… Chatterbox TTS model loaded on {DEVICE}")
597
+ except Exception as e:
598
+ raise RuntimeError(f"Failed to load Chatterbox TTS model: {e}")
599
 
600
  if seed_num_input != 0:
601
  set_seed(int(seed_num_input))
 
607
  if not text.strip():
608
  continue
609
 
610
+ print(f"๐ŸŽ™๏ธ ์ƒ์„ฑ ์ค‘: Speaker {i+1} - '{text[:50]}...'")
611
 
612
  try:
613
  # ํ…์ŠคํŠธ๊ฐ€ ์งง์œผ๋ฉด ๋‹จ์ผ ์ƒ์„ฑ
 
624
  else:
625
  # ๊ธด ํ…์ŠคํŠธ๋Š” ์ฒญํฌ๋กœ ๋ถ„ํ• 
626
  chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
627
+ print(f"๐Ÿ“ ํ…์ŠคํŠธ๋ฅผ {len(chunks)}๊ฐœ ์ฒญํฌ๋กœ ๋ถ„ํ• ")
628
 
629
  chunk_audio_segments = []
630
+ for j, chunk in enumerate(chunks):
631
+ print(f" ๐Ÿ“„ ์ฒญํฌ {j+1}/{len(chunks)} ์ƒ์„ฑ ์ค‘...")
632
  wav = model.generate(
633
  chunk,
634
  audio_prompt_path=audio_prompt_path_input,
 
654
  audio_segments.append(concatenated_turn)
655
 
656
  except Exception as e:
657
+ print(f"โŒ Speaker {i+1} ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
658
+ # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ๋ฌด์Œ์œผ๋กœ ๋Œ€์ฒด
659
+ silence_duration = int(2.0 * model.sr) # 2์ดˆ ๋ฌด์Œ
660
+ silence = np.zeros(silence_duration)
661
+ audio_segments.append(silence)
662
  continue
663
 
664
  if not audio_segments:
665
+ raise RuntimeError("๋ชจ๋“  ์˜ค๋””์˜ค ์ƒ์„ฑ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
666
 
667
  # ๋ชจ๋“  ์Šคํ”ผ์ปค์˜ ์˜ค๋””์˜ค ์„ธ๊ทธ๋จผํŠธ ์—ฐ๊ฒฐ
668
  speaker_silence_duration = int(0.5 * model.sr) # ์Šคํ”ผ์ปค ๊ฐ„ 0.5์ดˆ ๋ฌด์Œ
 
676
 
677
  concatenated_audio = np.concatenate(final_audio)
678
 
679
+ print(f"๐ŸŽ‰ ์˜ค๋””์˜ค ์ƒ์„ฑ ์™„๋ฃŒ! ์ด ๊ธธ์ด: {len(concatenated_audio) / model.sr:.2f}์ดˆ")
680
  return (model.sr, concatenated_audio)
681
 
682
  def _create_output_directory(self) -> str:
 
764
  if not conversation_text.strip():
765
  return "Please provide conversation text.", None
766
 
767
+ if not CHATTERBOX_AVAILABLE:
768
+ return "Chatterbox TTS not available. Please check the installation.", None
769
+
770
  try:
771
  conversation_json = converter.parse_conversation_text(conversation_text)
772
 
 
774
  return "No valid conversation found in the text.", None
775
 
776
  # Generate audio using Chatterbox TTS
777
+ try:
778
+ sr, audio = converter.generate_tts_audio_gpu(
779
+ conversation_json,
780
+ ref_audio_path,
781
+ exaggeration,
782
+ temperature,
783
+ seed_num,
784
+ cfg_weight,
785
+ chunk_size
786
+ )
787
 
788
+ # Save audio to file
789
+ output_dir = converter._create_output_directory()
790
+ output_file = os.path.join(output_dir, "podcast_audio.wav")
791
+ sf.write(output_file, audio, sr)
792
 
793
+ return "๐ŸŽ‰ Audio generated successfully!", output_file
794
+ except Exception as e:
795
+ error_msg = str(e)
796
+ if "Chatterbox TTS not available" in error_msg:
797
+ return "โŒ Chatterbox TTS is not properly installed. Please check the requirements.", None
798
+ elif "CUDA" in error_msg or "GPU" in error_msg:
799
+ return f"โŒ GPU error: {error_msg}. Please try reducing chunk size or use CPU.", None
800
+ else:
801
+ return f"โŒ Audio generation error: {error_msg}", None
802
 
803
  except Exception as e:
804
+ return f"โŒ Error processing conversation: {str(e)}", None
805
 
806
 
807
  def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):