Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from mars5 import Mars5TTS
|
| 3 |
+
import torch
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
# MARS5 TTS ๋ชจ๋ธ ๋ก๋ (Hugging Face Hub์์)
|
| 7 |
+
model = Mars5TTS.from_pretrained("camb-ai/mars5-tts")
|
| 8 |
+
|
| 9 |
+
def clone_with_prosody(text, ref_audio, enhance_prosody=True):
|
| 10 |
+
"""
|
| 11 |
+
์
๋ ฅ ํ
์คํธ์ ์ฐธ์กฐ ์ค๋์ค๋ฅผ ๋ฐ์ ๋งํฌ๋ฅผ ํด๋ก ํ์ฌ ์ค๋์ค ์ถ๋ ฅ
|
| 12 |
+
:param text: ๋ณํํ ํ
์คํธ
|
| 13 |
+
:param ref_audio: ๋งํฌ๋ฅผ ๋ณต์ ํ ์ค๋์ค ํ์ผ (3-5์ด ์ด์ ๊ถ์ฅ)
|
| 14 |
+
:param enhance_prosody: Prosody(์ต์/๋ฆฌ๋ฌ) ๊ฐ์กฐ ์ฌ๋ถ
|
| 15 |
+
:return: ์ถ๋ ฅ ์ค๋์ค ํ์ผ ๊ฒฝ๋ก
|
| 16 |
+
"""
|
| 17 |
+
try:
|
| 18 |
+
# ์ฐธ์กฐ ์ค๋์ค ์ฒ๋ฆฌ (Gradio์์ ์
๋ก๋๋ ์ค๋์ค: numpy ๋ฐฐ์ด ๋๋ ํ์ผ ๊ฒฝ๋ก)
|
| 19 |
+
if isinstance(ref_audio, np.ndarray):
|
| 20 |
+
# Gradio์์ ์ ๊ณตํ๋ ์ค๋์ค ๋ฐ์ดํฐ ์ฒ๋ฆฌ
|
| 21 |
+
audio_data = ref_audio
|
| 22 |
+
else:
|
| 23 |
+
# ํ์ผ ๊ฒฝ๋ก์ผ ๊ฒฝ์ฐ
|
| 24 |
+
audio_data = ref_audio
|
| 25 |
+
|
| 26 |
+
# MARS5 TTS๋ก ํด๋ก๋ (prosody ๊ฐ์กฐ)
|
| 27 |
+
output_audio = model.tts(
|
| 28 |
+
text=text,
|
| 29 |
+
ref_audio=audio_data,
|
| 30 |
+
prosody_enhance=enhance_prosody, # ์ต์/๋ฆฌ๋ฌ ๋ณต์ ๊ฐํ
|
| 31 |
+
language="ko" # ํ๊ตญ์ด ์ค์
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# ์ถ๋ ฅ ์ค๋์ค๋ฅผ ํ์ผ๋ก ์ ์ฅ
|
| 35 |
+
output_path = "output_cloned_audio.wav"
|
| 36 |
+
output_audio.save(output_path)
|
| 37 |
+
return output_path
|
| 38 |
+
except Exception as e:
|
| 39 |
+
return f"Error: {str(e)}"
|
| 40 |
+
|
| 41 |
+
# Gradio ์ธํฐํ์ด์ค ์ค์
|
| 42 |
+
interface = gr.Interface(
|
| 43 |
+
fn=clone_with_prosody,
|
| 44 |
+
inputs=[
|
| 45 |
+
gr.Textbox(label="Text to Convert", placeholder="Enter text to convert to speech"),
|
| 46 |
+
gr.Audio(label="Reference Audio (Your Voice)", type="filepath", source="upload"),
|
| 47 |
+
gr.Checkbox(label="Enhance Prosody (Intonation/Rhythm)", value=True)
|
| 48 |
+
],
|
| 49 |
+
outputs=gr.Audio(label="Cloned Voice Output"),
|
| 50 |
+
title="MARS5 Voice Cloner with Prosody",
|
| 51 |
+
description="Upload a 3-5 second audio of your voice and enter text to clone your voice with prosody (intonation, rhythm, emotion).",
|
| 52 |
+
allow_flagging="never"
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# ์ฑ ์คํ
|
| 56 |
+
interface.launch()
|