Spaces:

speako
/

cosyvoice2-server

Build error

App Files Files Community

parkjihye commited on Jun 13, 2025

Commit

98f15bb

1 Parent(s): 495bed3

Add Application file

Browse files

Files changed (9) hide show

.history/Dockerfile_20250614014624 +0 -0
.history/Dockerfile_20250614014629 +34 -0
.history/app_20250614014519.py +0 -0
.history/app_20250614014524.py +55 -0
.history/model/requirements_20250504154654.txt +40 -0
.history/model/requirements_20250614014735.txt +42 -0
Dockerfile +34 -0
app.py +55 -0
model/requirements.txt +2 -0

.history/Dockerfile_20250614014624 ADDED Viewed

File without changes

.history/Dockerfile_20250614014629 ADDED Viewed

	@@ -0,0 +1,34 @@

+# 베이스 이미지 선택 (PyTorch + CUDA 없이도 가능)
+FROM python:3.10-slim
+# 필수 패키지 설치
+RUN apt-get update && \
+    apt-get install -y git unzip ffmpeg && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# 작업 디렉토리 생성
+WORKDIR /app
+# requirements.txt 복사 및 설치
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+# Matcha-TTS와 cosyvoice 코드 복사
+COPY . .
+# 리소스 다운로드 및 압축 해제
+RUN python -c "\
+from modelscope import snapshot_download;\
+snapshot_download('iic/CosyVoice2-0.5B', local_dir='pretrained_models/CosyVoice2-0.5B');\
+snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')"
+RUN unzip -o pretrained_models/CosyVoice-ttsfrd/resource.zip -d pretrained_models/CosyVoice-ttsfrd || echo "resource.zip not found"
+# .whl 파일 설치
+RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd_dependency-0.1-py3-none-any.whl || echo "dep whl missing"
+RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd-0.4.2-cp310-cp310-linux_x86_64.whl || echo "core whl missing"
+# Gradio app 실행 (Hugging Face Spaces 기준)
+CMD ["python", "app.py"]

.history/app_20250614014519.py ADDED Viewed

File without changes

.history/app_20250614014524.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# app.py
+import gradio as gr
+from cosyvoice.cli.cosyvoice import CosyVoice2
+from cosyvoice.utils.file_utils import load_wav
+import torchaudio
+import os
+# 모델 초기화
+cosyvoice = CosyVoice2(
+    'pretrained_models/CosyVoice2-0.5B',
+    load_jit=False,
+    load_trt=False,
+    fp16=False,
+    use_flow_cache=False
+)
+def infer(text, prompt_text, prompt_wav):
+    if prompt_wav is None:
+        return "프롬프트 음성을 업로드해주세요.", None
+    # 업로드된 파일 경로에서 16kHz로 로드
+    prompt_speech_16k = load_wav(prompt_wav, 16000)
+    # 음성 합성 실행
+    results = cosyvoice.inference_zero_shot(
+        text,
+        prompt_text=prompt_text,
+        prompt_speech_16k=prompt_speech_16k,
+        text_frontend=True
+    )
+    # 결과 저장
+    output_path = f'korean_tts_output.wav'
+    torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice.sample_rate)
+    return "합성 완료!", output_path
+iface = gr.Interface(
+    fn=infer,
+    inputs=[
+        gr.Textbox(label="TTS할 텍스트", lines=2, placeholder="예: 공룡이 밤양갱을 몰래 먹고 도망쳤어요."),
+        gr.Textbox(label="프롬프트 문장 텍스트", lines=1, placeholder="예: 오느른 커피 안 마실 꺼야"),
+        gr.Audio(label="프롬프트 음성 (wav)", type="filepath")
+    ],
+    outputs=[
+        gr.Text(label="상태"),
+        gr.Audio(label="합성된 음성")
+    ],
+    title="CosyVoice2 음성 합성기",
+    description="직접 프롬프트 음성을 업로드하고 TTS 텍스트를 입력해보세요!"
+)
+if __name__ == "__main__":
+    iface.launch()

.history/model/requirements_20250504154654.txt ADDED Viewed

	@@ -0,0 +1,40 @@

+--extra-index-url https://download.pytorch.org/whl/cu121
+--extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
+conformer==0.3.2
+deepspeed==0.14.2; sys_platform == 'linux'
+diffusers==0.29.0
+gdown==5.1.0
+gradio==5.4.0
+grpcio==1.57.0
+grpcio-tools==1.57.0
+hydra-core==1.3.2
+HyperPyYAML==1.2.2
+inflect==7.3.1
+librosa==0.10.2
+lightning==2.2.4
+matplotlib==3.7.5
+modelscope==1.20.0
+networkx==3.1
+omegaconf==2.3.0
+onnx==1.16.0
+onnxruntime-gpu==1.18.0; sys_platform == 'linux'
+onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
+openai-whisper==20231117
+protobuf==4.25
+pyarrow==18.1.0
+pydantic==2.7.0
+pyworld==0.3.4
+rich==13.7.1
+soundfile==0.12.1
+tensorboard==2.14.0
+tensorrt-cu12==10.0.1; sys_platform == 'linux'
+tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
+tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
+torch==2.3.1
+torchaudio==2.3.1
+transformers==4.40.1
+uvicorn==0.30.0
+wget==3.2
+fastapi==0.115.6
+fastapi-cli==0.0.4
+WeTextProcessing==1.0.3

.history/model/requirements_20250614014735.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+--extra-index-url https://download.pytorch.org/whl/cu121
+--extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
+conformer==0.3.2
+deepspeed==0.14.2; sys_platform == 'linux'
+diffusers==0.29.0
+gdown==5.1.0
+gradio==5.4.0
+grpcio==1.57.0
+grpcio-tools==1.57.0
+hydra-core==1.3.2
+HyperPyYAML==1.2.2
+inflect==7.3.1
+librosa==0.10.2
+lightning==2.2.4
+matplotlib==3.7.5
+modelscope==1.20.0
+networkx==3.1
+omegaconf==2.3.0
+onnx==1.16.0
+onnxruntime-gpu==1.18.0; sys_platform == 'linux'
+onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
+openai-whisper==20231117
+protobuf==4.25
+pyarrow==18.1.0
+pydantic==2.7.0
+pyworld==0.3.4
+rich==13.7.1
+soundfile==0.12.1
+tensorboard==2.14.0
+tensorrt-cu12==10.0.1; sys_platform == 'linux'
+tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
+tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
+torch==2.3.1
+torchaudio==2.3.1
+transformers==4.40.1
+uvicorn==0.30.0
+wget==3.2
+fastapi==0.115.6
+fastapi-cli==0.0.4
+WeTextProcessing==1.0.3
+fastapi
+uvicorn[standard]

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# 베이스 이미지 선택 (PyTorch + CUDA 없이도 가능)
+FROM python:3.10-slim
+# 필수 패키지 설치
+RUN apt-get update && \
+    apt-get install -y git unzip ffmpeg && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# 작업 디렉토리 생성
+WORKDIR /app
+# requirements.txt 복사 및 설치
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+# Matcha-TTS와 cosyvoice 코드 복사
+COPY . .
+# 리소스 다운로드 및 압축 해제
+RUN python -c "\
+from modelscope import snapshot_download;\
+snapshot_download('iic/CosyVoice2-0.5B', local_dir='pretrained_models/CosyVoice2-0.5B');\
+snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')"
+RUN unzip -o pretrained_models/CosyVoice-ttsfrd/resource.zip -d pretrained_models/CosyVoice-ttsfrd || echo "resource.zip not found"
+# .whl 파일 설치
+RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd_dependency-0.1-py3-none-any.whl || echo "dep whl missing"
+RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd-0.4.2-cp310-cp310-linux_x86_64.whl || echo "core whl missing"
+# Gradio app 실행 (Hugging Face Spaces 기준)
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# app.py
+import gradio as gr
+from cosyvoice.cli.cosyvoice import CosyVoice2
+from cosyvoice.utils.file_utils import load_wav
+import torchaudio
+import os
+# 모델 초기화
+cosyvoice = CosyVoice2(
+    'pretrained_models/CosyVoice2-0.5B',
+    load_jit=False,
+    load_trt=False,
+    fp16=False,
+    use_flow_cache=False
+)
+def infer(text, prompt_text, prompt_wav):
+    if prompt_wav is None:
+        return "프롬프트 음성을 업로드해주세요.", None
+    # 업로드된 파일 경로에서 16kHz로 로드
+    prompt_speech_16k = load_wav(prompt_wav, 16000)
+    # 음성 합성 실행
+    results = cosyvoice.inference_zero_shot(
+        text,
+        prompt_text=prompt_text,
+        prompt_speech_16k=prompt_speech_16k,
+        text_frontend=True
+    )
+    # 결과 저장
+    output_path = f'korean_tts_output.wav'
+    torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice.sample_rate)
+    return "합성 완료!", output_path
+iface = gr.Interface(
+    fn=infer,
+    inputs=[
+        gr.Textbox(label="TTS할 텍스트", lines=2, placeholder="예: 공룡이 밤양갱을 몰래 먹고 도망쳤어요."),
+        gr.Textbox(label="프롬프트 문장 텍스트", lines=1, placeholder="예: 오느른 커피 안 마실 꺼야"),
+        gr.Audio(label="프롬프트 음성 (wav)", type="filepath")
+    ],
+    outputs=[
+        gr.Text(label="상태"),
+        gr.Audio(label="합성된 음성")
+    ],
+    title="CosyVoice2 음성 합성기",
+    description="직접 프롬프트 음성을 업로드하고 TTS 텍스트를 입력해보세요!"
+)
+if __name__ == "__main__":
+    iface.launch()

model/requirements.txt CHANGED Viewed

@@ -38,3 +38,5 @@ wget==3.2
 fastapi==0.115.6
 fastapi-cli==0.0.4
 WeTextProcessing==1.0.3

 fastapi==0.115.6
 fastapi-cli==0.0.4
 WeTextProcessing==1.0.3
+fastapi
+uvicorn[standard]