Spaces:

rwine
/

testpublic

Runtime error

App Files Files Community

testpublic / app.py

rwine

Update app.py

fe8a53b verified 4 months ago

raw

history blame

1.96 kB

	import subprocess
	import sys
	import gradio as gr
	from inference import Mars5TTS, InferenceConfig
	import librosa
	import torch
	import numpy as np

	# requirements.txt 설치 확인
	try:
	subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
	print("Successfully installed requirements.txt")
	except subprocess.CalledProcessError as e:
	print(f"Failed to install requirements.txt: {e}")

	# GPU 메모리 초기화
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	# MARS5 TTS 모델 로드
	try:
	mars5 = Mars5TTS.from_pretrained("CAMB-AI/MARS5-TTS")
	config = InferenceConfig(temperature=0.7)
	except Exception as e:
	print(f"Model loading error: {str(e)}")
	raise

	def clone_with_prosody(text, ref_audio, enhance_prosody=True):
	try:
	if isinstance(ref_audio, str):
	audio_data, sr = librosa.load(ref_audio, sr=16000)
	else:
	audio_data = ref_audio

	output_audio = mars5.tts(
	text=text,
	ref_audio=audio_data,
	ref_sr=16000,
	config=config if enhance_prosody else None,
	language="ko"
	)

	output_path = "output_cloned_audio.wav"
	output_audio.save(output_path)
	return output_path
	except Exception as e:
	return f"Error: {str(e)}"

	interface = gr.Interface(
	fn=clone_with_prosody,
	inputs=[
	gr.Textbox(label="Text to Convert", placeholder="Enter text to convert to speech"),
	gr.Audio(label="Reference Audio (Your Voice)", type="filepath", source="upload"),
	gr.Checkbox(label="Enhance Prosody (Intonation/Rhythm)", value=True)
	],
	outputs=gr.Audio(label="Cloned Voice Output"),
	title="MARS5 Voice Cloner with Prosody",
	description="Upload a 3-5 second audio of your voice and enter text to clone your voice with prosody (intonation, rhythm, emotion).",
	allow_flagging="never"
	)

	interface.launch()