import gradio as gr
from inference import Mars5TTS, InferenceConfig as config_class
import librosa
import torch
import numpy as np

# GPU 메모리 초기화 (무료 티어 메모리 부족 방지)
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# MARS5 TTS 모델 로드
try:
    mars5 = Mars5TTS.from_pretrained("CAMB-AI/MARS5-TTS")
    config = InferenceConfig(temperature=0.7)  # Prosody 제어를 위한 온도 설정
except Exception as e:
    print(f"Model loading error: {str(e)}")

def clone_with_prosody(text, ref_audio, enhance_prosody=True):
    """
    입력 텍스트와 참조 오디오를 받아 말투를 클론하여 오디오 출력
    :param text: 변환할 텍스트
    :param ref_audio: 말투를 복제할 오디오 파일 (3-5초 이상 권장)
    :param enhance_prosody: Prosody(억양/리듬) 강조 여부
    :return: 출력 오디오 파일 경로
    """
    try:
        # 참조 오디오 로드 (Gradio에서 제공된 filepath 처리)
        if isinstance(ref_audio, str):
            audio_data, sr = librosa.load(ref_audio, sr=16000)  # 16kHz로 로드
        else:
            audio_data = ref_audio  # Gradio에서 numpy 배열로 제공 시

        # MARS5 TTS로 클로닝
        output_audio = mars5.tts(
            text=text,
            ref_audio=audio_data,
            ref_sr=16000,  # 샘플레이트 고정
            config=config if enhance_prosody else None,  # Prosody 강조 설정
            language="ko"  # 한국어
        )
        
        # 출력 오디오 저장
        output_path = "output_cloned_audio.wav"
        output_audio.save(output_path)
        return output_path
    except Exception as e:
        return f"Error: {str(e)}"

# Gradio 인터페이스 설정
interface = gr.Interface(
    fn=clone_with_prosody,
    inputs=[
        gr.Textbox(label="Text to Convert", placeholder="Enter text to convert to speech"),
        gr.Audio(label="Reference Audio (Your Voice)", type="filepath", source="upload"),
        gr.Checkbox(label="Enhance Prosody (Intonation/Rhythm)", value=True)
    ],
    outputs=gr.Audio(label="Cloned Voice Output"),
    title="MARS5 Voice Cloner with Prosody",
    description="Upload a 3-5 second audio of your voice and enter text to clone your voice with prosody (intonation, rhythm, emotion).",
    allow_flagging="never"
)

# 앱 실행
interface.launch()