Spaces:
Build error
Build error
Add Application file
Browse files- .history/Dockerfile_20250614014624 +0 -0
- .history/Dockerfile_20250614014629 +34 -0
- .history/app_20250614014519.py +0 -0
- .history/app_20250614014524.py +55 -0
- .history/model/requirements_20250504154654.txt +40 -0
- .history/model/requirements_20250614014735.txt +42 -0
- Dockerfile +34 -0
- app.py +55 -0
- model/requirements.txt +2 -0
.history/Dockerfile_20250614014624
ADDED
|
File without changes
|
.history/Dockerfile_20250614014629
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ๋ฒ ์ด์ค ์ด๋ฏธ์ง ์ ํ (PyTorch + CUDA ์์ด๋ ๊ฐ๋ฅ)
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# ํ์ ํจํค์ง ์ค์น
|
| 5 |
+
RUN apt-get update && \
|
| 6 |
+
apt-get install -y git unzip ffmpeg && \
|
| 7 |
+
apt-get clean && \
|
| 8 |
+
rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# ์์
๋๋ ํ ๋ฆฌ ์์ฑ
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# requirements.txt ๋ณต์ฌ ๋ฐ ์ค์น
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --upgrade pip
|
| 16 |
+
RUN pip install -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Matcha-TTS์ cosyvoice ์ฝ๋ ๋ณต์ฌ
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# ๋ฆฌ์์ค ๋ค์ด๋ก๋ ๋ฐ ์์ถ ํด์
|
| 22 |
+
RUN python -c "\
|
| 23 |
+
from modelscope import snapshot_download;\
|
| 24 |
+
snapshot_download('iic/CosyVoice2-0.5B', local_dir='pretrained_models/CosyVoice2-0.5B');\
|
| 25 |
+
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')"
|
| 26 |
+
|
| 27 |
+
RUN unzip -o pretrained_models/CosyVoice-ttsfrd/resource.zip -d pretrained_models/CosyVoice-ttsfrd || echo "resource.zip not found"
|
| 28 |
+
|
| 29 |
+
# .whl ํ์ผ ์ค์น
|
| 30 |
+
RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd_dependency-0.1-py3-none-any.whl || echo "dep whl missing"
|
| 31 |
+
RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd-0.4.2-cp310-cp310-linux_x86_64.whl || echo "core whl missing"
|
| 32 |
+
|
| 33 |
+
# Gradio app ์คํ (Hugging Face Spaces ๊ธฐ์ค)
|
| 34 |
+
CMD ["python", "app.py"]
|
.history/app_20250614014519.py
ADDED
|
File without changes
|
.history/app_20250614014524.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from cosyvoice.cli.cosyvoice import CosyVoice2
|
| 5 |
+
from cosyvoice.utils.file_utils import load_wav
|
| 6 |
+
import torchaudio
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# ๋ชจ๋ธ ์ด๊ธฐํ
|
| 10 |
+
cosyvoice = CosyVoice2(
|
| 11 |
+
'pretrained_models/CosyVoice2-0.5B',
|
| 12 |
+
load_jit=False,
|
| 13 |
+
load_trt=False,
|
| 14 |
+
fp16=False,
|
| 15 |
+
use_flow_cache=False
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
def infer(text, prompt_text, prompt_wav):
|
| 19 |
+
if prompt_wav is None:
|
| 20 |
+
return "ํ๋กฌํํธ ์์ฑ์ ์
๋ก๋ํด์ฃผ์ธ์.", None
|
| 21 |
+
|
| 22 |
+
# ์
๋ก๋๋ ํ์ผ ๊ฒฝ๋ก์์ 16kHz๋ก ๋ก๋
|
| 23 |
+
prompt_speech_16k = load_wav(prompt_wav, 16000)
|
| 24 |
+
|
| 25 |
+
# ์์ฑ ํฉ์ฑ ์คํ
|
| 26 |
+
results = cosyvoice.inference_zero_shot(
|
| 27 |
+
text,
|
| 28 |
+
prompt_text=prompt_text,
|
| 29 |
+
prompt_speech_16k=prompt_speech_16k,
|
| 30 |
+
text_frontend=True
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# ๊ฒฐ๊ณผ ์ ์ฅ
|
| 34 |
+
output_path = f'korean_tts_output.wav'
|
| 35 |
+
torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice.sample_rate)
|
| 36 |
+
|
| 37 |
+
return "ํฉ์ฑ ์๋ฃ!", output_path
|
| 38 |
+
|
| 39 |
+
iface = gr.Interface(
|
| 40 |
+
fn=infer,
|
| 41 |
+
inputs=[
|
| 42 |
+
gr.Textbox(label="TTSํ ํ
์คํธ", lines=2, placeholder="์: ๊ณต๋ฃก์ด ๋ฐค์๊ฐฑ์ ๋ชฐ๋ ๋จน๊ณ ๋๋ง์ณค์ด์."),
|
| 43 |
+
gr.Textbox(label="ํ๋กฌํํธ ๋ฌธ์ฅ ํ
์คํธ", lines=1, placeholder="์: ์ค๋๋ฅธ ์ปคํผ ์ ๋ง์ค ๊บผ์ผ"),
|
| 44 |
+
gr.Audio(label="ํ๋กฌํํธ ์์ฑ (wav)", type="filepath")
|
| 45 |
+
],
|
| 46 |
+
outputs=[
|
| 47 |
+
gr.Text(label="์ํ"),
|
| 48 |
+
gr.Audio(label="ํฉ์ฑ๋ ์์ฑ")
|
| 49 |
+
],
|
| 50 |
+
title="CosyVoice2 ์์ฑ ํฉ์ฑ๊ธฐ",
|
| 51 |
+
description="์ง์ ํ๋กฌํํธ ์์ฑ์ ์
๋ก๋ํ๊ณ TTS ํ
์คํธ๋ฅผ ์
๋ ฅํด๋ณด์ธ์!"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
iface.launch()
|
.history/model/requirements_20250504154654.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 2 |
+
--extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
|
| 3 |
+
conformer==0.3.2
|
| 4 |
+
deepspeed==0.14.2; sys_platform == 'linux'
|
| 5 |
+
diffusers==0.29.0
|
| 6 |
+
gdown==5.1.0
|
| 7 |
+
gradio==5.4.0
|
| 8 |
+
grpcio==1.57.0
|
| 9 |
+
grpcio-tools==1.57.0
|
| 10 |
+
hydra-core==1.3.2
|
| 11 |
+
HyperPyYAML==1.2.2
|
| 12 |
+
inflect==7.3.1
|
| 13 |
+
librosa==0.10.2
|
| 14 |
+
lightning==2.2.4
|
| 15 |
+
matplotlib==3.7.5
|
| 16 |
+
modelscope==1.20.0
|
| 17 |
+
networkx==3.1
|
| 18 |
+
omegaconf==2.3.0
|
| 19 |
+
onnx==1.16.0
|
| 20 |
+
onnxruntime-gpu==1.18.0; sys_platform == 'linux'
|
| 21 |
+
onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
|
| 22 |
+
openai-whisper==20231117
|
| 23 |
+
protobuf==4.25
|
| 24 |
+
pyarrow==18.1.0
|
| 25 |
+
pydantic==2.7.0
|
| 26 |
+
pyworld==0.3.4
|
| 27 |
+
rich==13.7.1
|
| 28 |
+
soundfile==0.12.1
|
| 29 |
+
tensorboard==2.14.0
|
| 30 |
+
tensorrt-cu12==10.0.1; sys_platform == 'linux'
|
| 31 |
+
tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
|
| 32 |
+
tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
|
| 33 |
+
torch==2.3.1
|
| 34 |
+
torchaudio==2.3.1
|
| 35 |
+
transformers==4.40.1
|
| 36 |
+
uvicorn==0.30.0
|
| 37 |
+
wget==3.2
|
| 38 |
+
fastapi==0.115.6
|
| 39 |
+
fastapi-cli==0.0.4
|
| 40 |
+
WeTextProcessing==1.0.3
|
.history/model/requirements_20250614014735.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 2 |
+
--extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
|
| 3 |
+
conformer==0.3.2
|
| 4 |
+
deepspeed==0.14.2; sys_platform == 'linux'
|
| 5 |
+
diffusers==0.29.0
|
| 6 |
+
gdown==5.1.0
|
| 7 |
+
gradio==5.4.0
|
| 8 |
+
grpcio==1.57.0
|
| 9 |
+
grpcio-tools==1.57.0
|
| 10 |
+
hydra-core==1.3.2
|
| 11 |
+
HyperPyYAML==1.2.2
|
| 12 |
+
inflect==7.3.1
|
| 13 |
+
librosa==0.10.2
|
| 14 |
+
lightning==2.2.4
|
| 15 |
+
matplotlib==3.7.5
|
| 16 |
+
modelscope==1.20.0
|
| 17 |
+
networkx==3.1
|
| 18 |
+
omegaconf==2.3.0
|
| 19 |
+
onnx==1.16.0
|
| 20 |
+
onnxruntime-gpu==1.18.0; sys_platform == 'linux'
|
| 21 |
+
onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
|
| 22 |
+
openai-whisper==20231117
|
| 23 |
+
protobuf==4.25
|
| 24 |
+
pyarrow==18.1.0
|
| 25 |
+
pydantic==2.7.0
|
| 26 |
+
pyworld==0.3.4
|
| 27 |
+
rich==13.7.1
|
| 28 |
+
soundfile==0.12.1
|
| 29 |
+
tensorboard==2.14.0
|
| 30 |
+
tensorrt-cu12==10.0.1; sys_platform == 'linux'
|
| 31 |
+
tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
|
| 32 |
+
tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
|
| 33 |
+
torch==2.3.1
|
| 34 |
+
torchaudio==2.3.1
|
| 35 |
+
transformers==4.40.1
|
| 36 |
+
uvicorn==0.30.0
|
| 37 |
+
wget==3.2
|
| 38 |
+
fastapi==0.115.6
|
| 39 |
+
fastapi-cli==0.0.4
|
| 40 |
+
WeTextProcessing==1.0.3
|
| 41 |
+
fastapi
|
| 42 |
+
uvicorn[standard]
|
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ๋ฒ ์ด์ค ์ด๋ฏธ์ง ์ ํ (PyTorch + CUDA ์์ด๋ ๊ฐ๋ฅ)
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# ํ์ ํจํค์ง ์ค์น
|
| 5 |
+
RUN apt-get update && \
|
| 6 |
+
apt-get install -y git unzip ffmpeg && \
|
| 7 |
+
apt-get clean && \
|
| 8 |
+
rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# ์์
๋๋ ํ ๋ฆฌ ์์ฑ
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# requirements.txt ๋ณต์ฌ ๋ฐ ์ค์น
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --upgrade pip
|
| 16 |
+
RUN pip install -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Matcha-TTS์ cosyvoice ์ฝ๋ ๋ณต์ฌ
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# ๋ฆฌ์์ค ๋ค์ด๋ก๋ ๋ฐ ์์ถ ํด์
|
| 22 |
+
RUN python -c "\
|
| 23 |
+
from modelscope import snapshot_download;\
|
| 24 |
+
snapshot_download('iic/CosyVoice2-0.5B', local_dir='pretrained_models/CosyVoice2-0.5B');\
|
| 25 |
+
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')"
|
| 26 |
+
|
| 27 |
+
RUN unzip -o pretrained_models/CosyVoice-ttsfrd/resource.zip -d pretrained_models/CosyVoice-ttsfrd || echo "resource.zip not found"
|
| 28 |
+
|
| 29 |
+
# .whl ํ์ผ ์ค์น
|
| 30 |
+
RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd_dependency-0.1-py3-none-any.whl || echo "dep whl missing"
|
| 31 |
+
RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd-0.4.2-cp310-cp310-linux_x86_64.whl || echo "core whl missing"
|
| 32 |
+
|
| 33 |
+
# Gradio app ์คํ (Hugging Face Spaces ๊ธฐ์ค)
|
| 34 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from cosyvoice.cli.cosyvoice import CosyVoice2
|
| 5 |
+
from cosyvoice.utils.file_utils import load_wav
|
| 6 |
+
import torchaudio
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# ๋ชจ๋ธ ์ด๊ธฐํ
|
| 10 |
+
cosyvoice = CosyVoice2(
|
| 11 |
+
'pretrained_models/CosyVoice2-0.5B',
|
| 12 |
+
load_jit=False,
|
| 13 |
+
load_trt=False,
|
| 14 |
+
fp16=False,
|
| 15 |
+
use_flow_cache=False
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
def infer(text, prompt_text, prompt_wav):
|
| 19 |
+
if prompt_wav is None:
|
| 20 |
+
return "ํ๋กฌํํธ ์์ฑ์ ์
๋ก๋ํด์ฃผ์ธ์.", None
|
| 21 |
+
|
| 22 |
+
# ์
๋ก๋๋ ํ์ผ ๊ฒฝ๋ก์์ 16kHz๋ก ๋ก๋
|
| 23 |
+
prompt_speech_16k = load_wav(prompt_wav, 16000)
|
| 24 |
+
|
| 25 |
+
# ์์ฑ ํฉ์ฑ ์คํ
|
| 26 |
+
results = cosyvoice.inference_zero_shot(
|
| 27 |
+
text,
|
| 28 |
+
prompt_text=prompt_text,
|
| 29 |
+
prompt_speech_16k=prompt_speech_16k,
|
| 30 |
+
text_frontend=True
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# ๊ฒฐ๊ณผ ์ ์ฅ
|
| 34 |
+
output_path = f'korean_tts_output.wav'
|
| 35 |
+
torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice.sample_rate)
|
| 36 |
+
|
| 37 |
+
return "ํฉ์ฑ ์๋ฃ!", output_path
|
| 38 |
+
|
| 39 |
+
iface = gr.Interface(
|
| 40 |
+
fn=infer,
|
| 41 |
+
inputs=[
|
| 42 |
+
gr.Textbox(label="TTSํ ํ
์คํธ", lines=2, placeholder="์: ๊ณต๋ฃก์ด ๋ฐค์๊ฐฑ์ ๋ชฐ๋ ๋จน๊ณ ๋๋ง์ณค์ด์."),
|
| 43 |
+
gr.Textbox(label="ํ๋กฌํํธ ๋ฌธ์ฅ ํ
์คํธ", lines=1, placeholder="์: ์ค๋๋ฅธ ์ปคํผ ์ ๋ง์ค ๊บผ์ผ"),
|
| 44 |
+
gr.Audio(label="ํ๋กฌํํธ ์์ฑ (wav)", type="filepath")
|
| 45 |
+
],
|
| 46 |
+
outputs=[
|
| 47 |
+
gr.Text(label="์ํ"),
|
| 48 |
+
gr.Audio(label="ํฉ์ฑ๋ ์์ฑ")
|
| 49 |
+
],
|
| 50 |
+
title="CosyVoice2 ์์ฑ ํฉ์ฑ๊ธฐ",
|
| 51 |
+
description="์ง์ ํ๋กฌํํธ ์์ฑ์ ์
๋ก๋ํ๊ณ TTS ํ
์คํธ๋ฅผ ์
๋ ฅํด๋ณด์ธ์!"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
iface.launch()
|
model/requirements.txt
CHANGED
|
@@ -38,3 +38,5 @@ wget==3.2
|
|
| 38 |
fastapi==0.115.6
|
| 39 |
fastapi-cli==0.0.4
|
| 40 |
WeTextProcessing==1.0.3
|
|
|
|
|
|
|
|
|
| 38 |
fastapi==0.115.6
|
| 39 |
fastapi-cli==0.0.4
|
| 40 |
WeTextProcessing==1.0.3
|
| 41 |
+
fastapi
|
| 42 |
+
uvicorn[standard]
|