Spaces:

VIDraft
/

Voice-Clone-Podcast

Runtime error

App Files Files Community

openfree commited on May 30

Commit

4c79f75

verified ·

1 Parent(s): df21e91

Delete app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +0 -324

app-backup.py DELETED Viewed

@@ -1,324 +0,0 @@
-import random
-import numpy as np
-import torch
-from chatterbox.src.chatterbox.tts import ChatterboxTTS
-import gradio as gr
-import spaces
-import re
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"🚀 Running on device: {DEVICE}")
-# --- Global Model Initialization ---
-MODEL = None
-def get_or_load_model():
-    """Loads the ChatterboxTTS model if it hasn't been loaded already,
-    and ensures it's on the correct device."""
-    global MODEL
-    if MODEL is None:
-        print("Model not loaded, initializing...")
-        try:
-            MODEL = ChatterboxTTS.from_pretrained(DEVICE)
-            if hasattr(MODEL, 'to') and str(MODEL.device) != DEVICE:
-                MODEL.to(DEVICE)
-            print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
-        except Exception as e:
-            print(f"Error loading model: {e}")
-            raise
-    return MODEL
-# Attempt to load the model at startup.
-try:
-    get_or_load_model()
-except Exception as e:
-    print(f"CRITICAL: Failed to load model on startup. Application may not function. Error: {e}")
-def set_seed(seed: int):
-    """Sets the random seed for reproducibility across torch, numpy, and random."""
-    torch.manual_seed(seed)
-    if DEVICE == "cuda":
-        torch.cuda.manual_seed(seed)
-        torch.cuda.manual_seed_all(seed)
-    random.seed(seed)
-    np.random.seed(seed)
-def split_text_into_chunks(text: str, max_chars: int = 250) -> list[str]:
-    """
-    텍스트를 문장 단위로 나누되, 각 청크가 max_chars를 넘지 않도록 합니다.
-    """
-    # 문장 단위로 분리 (기본적인 문장 분리)
-    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
-    chunks = []
-    current_chunk = ""
-    for sentence in sentences:
-        # 현재 청크에 문장을 추가해도 max_chars를 넘지 않으면 추가
-        if len(current_chunk) + len(sentence) + 1 <= max_chars:
-            if current_chunk:
-                current_chunk += " " + sentence
-            else:
-                current_chunk = sentence
-        else:
-            # 현재 청크를 저장하고 새 청크 시작
-            if current_chunk:
-                chunks.append(current_chunk)
-            # 문장 자체가 max_chars보다 긴 경우 강제로 분할
-            if len(sentence) > max_chars:
-                words = sentence.split()
-                temp_chunk = ""
-                for word in words:
-                    if len(temp_chunk) + len(word) + 1 <= max_chars:
-                        if temp_chunk:
-                            temp_chunk += " " + word
-                        else:
-                            temp_chunk = word
-                    else:
-                        if temp_chunk:
-                            chunks.append(temp_chunk)
-                        temp_chunk = word
-                if temp_chunk:
-                    current_chunk = temp_chunk
-            else:
-                current_chunk = sentence
-    # 마지막 청크 추가
-    if current_chunk:
-        chunks.append(current_chunk)
-    return chunks
-@spaces.GPU
-def generate_tts_audio(
-    text_input: str,
-    audio_prompt_path_input: str,
-    exaggeration_input: float,
-    temperature_input: float,
-    seed_num_input: int,
-    cfgw_input: float,
-    chunk_size_input: int,
-    progress=gr.Progress()
-) -> tuple[int, np.ndarray]:
-    """
-    긴 텍스트를 청크로 나누어 TTS 오디오를 생성하고 연결합니다.
-    모든 처리를 단일 GPU 컨텍스트 내에서 수행합니다.
-    """
-    current_model = get_or_load_model()
-    if current_model is None:
-        raise RuntimeError("TTS model is not loaded.")
-    if seed_num_input != 0:
-        set_seed(int(seed_num_input))
-    # 텍스트를 청크로 분할
-    chunks = split_text_into_chunks(text_input, max_chars=chunk_size_input)
-    total_chunks = len(chunks)
-    print(f"텍스트를 {total_chunks}개의 청크로 분할했습니다.")
-    # 각 청크에 대해 오디오 생성
-    audio_segments = []
-    for i, chunk in enumerate(chunks):
-        progress((i + 1) / total_chunks, f"청크 {i + 1}/{total_chunks} 생성 중...")
-        print(f"청크 {i + 1}/{total_chunks} 생성 중: '{chunk[:50]}...'")
-        try:
-            # 직접 generate 메서드 호출 (별도 함수 없이)
-            wav = current_model.generate(
-                chunk,
-                audio_prompt_path=audio_prompt_path_input,
-                exaggeration=exaggeration_input,
-                temperature=temperature_input,
-                cfg_weight=cfgw_input,
-            )
-            wav_chunk = wav.squeeze(0).numpy()
-            audio_segments.append(wav_chunk)
-        except Exception as e:
-            print(f"청크 {i + 1} 생성 중 오류 발생: {e}")
-            # 오류 발생 시 계속 진행
-            continue
-    # 모든 오디오 세그먼트 연결
-    if audio_segments:
-        # 각 청크 사이에 짧은 무음 추가 (선택사항)
-        silence_duration = int(0.2 * current_model.sr)  # 0.2초 무음
-        silence = np.zeros(silence_duration)
-        final_audio = []
-        for i, segment in enumerate(audio_segments):
-            final_audio.append(segment)
-            if i < len(audio_segments) - 1:  # 마지막 세그먼트가 아니면 무음 추가
-                final_audio.append(silence)
-        concatenated_audio = np.concatenate(final_audio)
-        print(f"오디오 생성 완료. 총 길이: {len(concatenated_audio) / current_model.sr:.2f}초")
-        return (current_model.sr, concatenated_audio)
-    else:
-        raise RuntimeError("오디오 생성에 실패했습니다.")
-# 단일 청크 생성을 위한 간단한 wrapper 함수 (GPU 데코레이터 포함)
-@spaces.GPU
-def generate_single_audio(
-    text_input: str,
-    audio_prompt_path_input: str,
-    exaggeration_input: float,
-    temperature_input: float,
-    seed_num_input: int,
-    cfgw_input: float
-) -> tuple[int, np.ndarray]:
-    """
-    단일 텍스트에 대한 TTS 오디오 생성 (300자 이하)
-    """
-    current_model = get_or_load_model()
-    if current_model is None:
-        raise RuntimeError("TTS model is not loaded.")
-    if seed_num_input != 0:
-        set_seed(int(seed_num_input))
-    print(f"Generating audio for text: '{text_input[:50]}...'")
-    wav = current_model.generate(
-        text_input[:300],  # 안전을 위해 300자로 제한
-        audio_prompt_path=audio_prompt_path_input,
-        exaggeration=exaggeration_input,
-        temperature=temperature_input,
-        cfg_weight=cfgw_input,
-    )
-    print("Audio generation complete.")
-    return (current_model.sr, wav.squeeze(0).numpy())
-with gr.Blocks() as demo:
-    gr.Markdown(
-        """
-        # Chatterbox TTS Demo - 무제한 길이 버전
-        긴 텍스트도 청크로 나누어 처리하여 제한 없이 음성을 생성합니다.
-        """
-    )
-    with gr.Row():
-        with gr.Column():
-            text = gr.Textbox(
-                value="Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.",
-                label="텍스트 입력 (길이 제한 없음)",
-                lines=10,
-                max_lines=30
-            )
-            ref_wav = gr.Audio(
-                sources=["upload", "microphone"],
-                type="filepath",
-                label="Reference Audio File (Optional)",
-                value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac"
-            )
-            with gr.Row():
-                exaggeration = gr.Slider(
-                    0.25, 2, step=.05,
-                    label="Exaggeration (Neutral = 0.5)",
-                    value=.5
-                )
-                cfg_weight = gr.Slider(
-                    0.2, 1, step=.05,
-                    label="CFG/Pace",
-                    value=0.5
-                )
-            with gr.Row():
-                chunk_size = gr.Slider(
-                    100, 300, step=50,
-                    label="청크 크기 (문자 수)",
-                    value=250,
-                    info="텍스트를 나눌 청크의 최대 크기입니다. 작을수록 더 자연스럽지만 처리 시간이 길어집니다."
-                )
-                mode = gr.Radio(
-                    choices=["단일 생성 (300자 이하)", "청크 분할 (무제한)"],
-                    value="청크 분할 (무제한)",
-                    label="생성 모드"
-                )
-            with gr.Accordion("고급 옵션", open=False):
-                seed_num = gr.Number(value=0, label="Random seed (0 for random)")
-                temp = gr.Slider(0.05, 5, step=.05, label="Temperature", value=.8)
-            run_btn = gr.Button("음성 생성", variant="primary")
-        with gr.Column():
-            audio_output = gr.Audio(label="생성된 음성")
-            # 텍스트 길이 표시
-            char_count = gr.Textbox(
-                label="텍스트 정보",
-                value="0 문자, 약 0개 청크",
-                interactive=False
-            )
-    # 텍스트 입력 시 문자 수와 예상 청크 수 업데이트
-    def update_char_count(text, chunk_size, mode):
-        char_len = len(text)
-        if mode == "단일 생성 (300자 이하)":
-            if char_len > 300:
-                return f"{char_len} 문자 (⚠️ 300자 초과 - 잘릴 수 있음)"
-            else:
-                return f"{char_len} 문자"
-        else:
-            chunks = split_text_into_chunks(text, max_chars=chunk_size)
-            chunk_count = len(chunks)
-            return f"{char_len} 문자, 약 {chunk_count}개 청크로 분할됨"
-    text.change(
-        fn=update_char_count,
-        inputs=[text, chunk_size, mode],
-        outputs=[char_count]
-    )
-    chunk_size.change(
-        fn=update_char_count,
-        inputs=[text, chunk_size, mode],
-        outputs=[char_count]
-    )
-    mode.change(
-        fn=update_char_count,
-        inputs=[text, chunk_size, mode],
-        outputs=[char_count]
-    )
-    # 모드에 따라 다른 함수 호출
-    def process_audio(text, ref_wav, exaggeration, temp, seed_num, cfg_weight, chunk_size, mode):
-        if mode == "단일 생성 (300자 이하)":
-            return generate_single_audio(text, ref_wav, exaggeration, temp, seed_num, cfg_weight)
-        else:
-            return generate_tts_audio(text, ref_wav, exaggeration, temp, seed_num, cfg_weight, chunk_size)
-    run_btn.click(
-        fn=process_audio,
-        inputs=[
-            text,
-            ref_wav,
-            exaggeration,
-            temp,
-            seed_num,
-            cfg_weight,
-            chunk_size,
-            mode
-        ],
-        outputs=[audio_output],
-    )
-    gr.Markdown(
-        """
-        ### 사용 팁:
-        - **단일 생성 모드**: 300자 이하의 짧은 텍스트에 적합하며 빠르게 생성됩니다
-        - **청크 분할 모드**: 긴 텍스트를 자동으로 여러 부분으로 나누어 처리합니다
-        - 청크 크기를 조절하여 품질과 속도의 균형을 맞출 수 있습니다
-        - 각 청크 사이에는 자연스러운 전환을 위해 짧은 무음이 추가됩니다
-        - 매우 긴 텍스트의 경우 처리 시간이 오래 걸릴 수 있습니다
-        """
-    )
-demo.launch()