parkjihye commited on
Commit
98f15bb
ยท
1 Parent(s): 495bed3

Add Application file

Browse files
.history/Dockerfile_20250614014624 ADDED
File without changes
.history/Dockerfile_20250614014629 ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๋ฒ ์ด์Šค ์ด๋ฏธ์ง€ ์„ ํƒ (PyTorch + CUDA ์—†์ด๋„ ๊ฐ€๋Šฅ)
2
+ FROM python:3.10-slim
3
+
4
+ # ํ•„์ˆ˜ ํŒจํ‚ค์ง€ ์„ค์น˜
5
+ RUN apt-get update && \
6
+ apt-get install -y git unzip ffmpeg && \
7
+ apt-get clean && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
11
+ WORKDIR /app
12
+
13
+ # requirements.txt ๋ณต์‚ฌ ๋ฐ ์„ค์น˜
14
+ COPY requirements.txt .
15
+ RUN pip install --upgrade pip
16
+ RUN pip install -r requirements.txt
17
+
18
+ # Matcha-TTS์™€ cosyvoice ์ฝ”๋“œ ๋ณต์‚ฌ
19
+ COPY . .
20
+
21
+ # ๋ฆฌ์†Œ์Šค ๋‹ค์šด๋กœ๋“œ ๋ฐ ์••์ถ• ํ•ด์ œ
22
+ RUN python -c "\
23
+ from modelscope import snapshot_download;\
24
+ snapshot_download('iic/CosyVoice2-0.5B', local_dir='pretrained_models/CosyVoice2-0.5B');\
25
+ snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')"
26
+
27
+ RUN unzip -o pretrained_models/CosyVoice-ttsfrd/resource.zip -d pretrained_models/CosyVoice-ttsfrd || echo "resource.zip not found"
28
+
29
+ # .whl ํŒŒ์ผ ์„ค์น˜
30
+ RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd_dependency-0.1-py3-none-any.whl || echo "dep whl missing"
31
+ RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd-0.4.2-cp310-cp310-linux_x86_64.whl || echo "core whl missing"
32
+
33
+ # Gradio app ์‹คํ–‰ (Hugging Face Spaces ๊ธฐ์ค€)
34
+ CMD ["python", "app.py"]
.history/app_20250614014519.py ADDED
File without changes
.history/app_20250614014524.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ from cosyvoice.cli.cosyvoice import CosyVoice2
5
+ from cosyvoice.utils.file_utils import load_wav
6
+ import torchaudio
7
+ import os
8
+
9
+ # ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
10
+ cosyvoice = CosyVoice2(
11
+ 'pretrained_models/CosyVoice2-0.5B',
12
+ load_jit=False,
13
+ load_trt=False,
14
+ fp16=False,
15
+ use_flow_cache=False
16
+ )
17
+
18
+ def infer(text, prompt_text, prompt_wav):
19
+ if prompt_wav is None:
20
+ return "ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.", None
21
+
22
+ # ์—…๋กœ๋“œ๋œ ํŒŒ์ผ ๊ฒฝ๋กœ์—์„œ 16kHz๋กœ ๋กœ๋“œ
23
+ prompt_speech_16k = load_wav(prompt_wav, 16000)
24
+
25
+ # ์Œ์„ฑ ํ•ฉ์„ฑ ์‹คํ–‰
26
+ results = cosyvoice.inference_zero_shot(
27
+ text,
28
+ prompt_text=prompt_text,
29
+ prompt_speech_16k=prompt_speech_16k,
30
+ text_frontend=True
31
+ )
32
+
33
+ # ๊ฒฐ๊ณผ ์ €์žฅ
34
+ output_path = f'korean_tts_output.wav'
35
+ torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice.sample_rate)
36
+
37
+ return "ํ•ฉ์„ฑ ์™„๋ฃŒ!", output_path
38
+
39
+ iface = gr.Interface(
40
+ fn=infer,
41
+ inputs=[
42
+ gr.Textbox(label="TTSํ•  ํ…์ŠคํŠธ", lines=2, placeholder="์˜ˆ: ๊ณต๋ฃก์ด ๋ฐค์–‘๊ฐฑ์„ ๋ชฐ๋ž˜ ๋จน๊ณ  ๋„๋ง์ณค์–ด์š”."),
43
+ gr.Textbox(label="ํ”„๋กฌํ”„ํŠธ ๋ฌธ์žฅ ํ…์ŠคํŠธ", lines=1, placeholder="์˜ˆ: ์˜ค๋А๋ฅธ ์ปคํ”ผ ์•ˆ ๋งˆ์‹ค ๊บผ์•ผ"),
44
+ gr.Audio(label="ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ (wav)", type="filepath")
45
+ ],
46
+ outputs=[
47
+ gr.Text(label="์ƒํƒœ"),
48
+ gr.Audio(label="ํ•ฉ์„ฑ๋œ ์Œ์„ฑ")
49
+ ],
50
+ title="CosyVoice2 ์Œ์„ฑ ํ•ฉ์„ฑ๊ธฐ",
51
+ description="์ง์ ‘ ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ์„ ์—…๋กœ๋“œํ•˜๊ณ  TTS ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•ด๋ณด์„ธ์š”!"
52
+ )
53
+
54
+ if __name__ == "__main__":
55
+ iface.launch()
.history/model/requirements_20250504154654.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu121
2
+ --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
3
+ conformer==0.3.2
4
+ deepspeed==0.14.2; sys_platform == 'linux'
5
+ diffusers==0.29.0
6
+ gdown==5.1.0
7
+ gradio==5.4.0
8
+ grpcio==1.57.0
9
+ grpcio-tools==1.57.0
10
+ hydra-core==1.3.2
11
+ HyperPyYAML==1.2.2
12
+ inflect==7.3.1
13
+ librosa==0.10.2
14
+ lightning==2.2.4
15
+ matplotlib==3.7.5
16
+ modelscope==1.20.0
17
+ networkx==3.1
18
+ omegaconf==2.3.0
19
+ onnx==1.16.0
20
+ onnxruntime-gpu==1.18.0; sys_platform == 'linux'
21
+ onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
22
+ openai-whisper==20231117
23
+ protobuf==4.25
24
+ pyarrow==18.1.0
25
+ pydantic==2.7.0
26
+ pyworld==0.3.4
27
+ rich==13.7.1
28
+ soundfile==0.12.1
29
+ tensorboard==2.14.0
30
+ tensorrt-cu12==10.0.1; sys_platform == 'linux'
31
+ tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
32
+ tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
33
+ torch==2.3.1
34
+ torchaudio==2.3.1
35
+ transformers==4.40.1
36
+ uvicorn==0.30.0
37
+ wget==3.2
38
+ fastapi==0.115.6
39
+ fastapi-cli==0.0.4
40
+ WeTextProcessing==1.0.3
.history/model/requirements_20250614014735.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu121
2
+ --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
3
+ conformer==0.3.2
4
+ deepspeed==0.14.2; sys_platform == 'linux'
5
+ diffusers==0.29.0
6
+ gdown==5.1.0
7
+ gradio==5.4.0
8
+ grpcio==1.57.0
9
+ grpcio-tools==1.57.0
10
+ hydra-core==1.3.2
11
+ HyperPyYAML==1.2.2
12
+ inflect==7.3.1
13
+ librosa==0.10.2
14
+ lightning==2.2.4
15
+ matplotlib==3.7.5
16
+ modelscope==1.20.0
17
+ networkx==3.1
18
+ omegaconf==2.3.0
19
+ onnx==1.16.0
20
+ onnxruntime-gpu==1.18.0; sys_platform == 'linux'
21
+ onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
22
+ openai-whisper==20231117
23
+ protobuf==4.25
24
+ pyarrow==18.1.0
25
+ pydantic==2.7.0
26
+ pyworld==0.3.4
27
+ rich==13.7.1
28
+ soundfile==0.12.1
29
+ tensorboard==2.14.0
30
+ tensorrt-cu12==10.0.1; sys_platform == 'linux'
31
+ tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
32
+ tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
33
+ torch==2.3.1
34
+ torchaudio==2.3.1
35
+ transformers==4.40.1
36
+ uvicorn==0.30.0
37
+ wget==3.2
38
+ fastapi==0.115.6
39
+ fastapi-cli==0.0.4
40
+ WeTextProcessing==1.0.3
41
+ fastapi
42
+ uvicorn[standard]
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๋ฒ ์ด์Šค ์ด๋ฏธ์ง€ ์„ ํƒ (PyTorch + CUDA ์—†์ด๋„ ๊ฐ€๋Šฅ)
2
+ FROM python:3.10-slim
3
+
4
+ # ํ•„์ˆ˜ ํŒจํ‚ค์ง€ ์„ค์น˜
5
+ RUN apt-get update && \
6
+ apt-get install -y git unzip ffmpeg && \
7
+ apt-get clean && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
11
+ WORKDIR /app
12
+
13
+ # requirements.txt ๋ณต์‚ฌ ๋ฐ ์„ค์น˜
14
+ COPY requirements.txt .
15
+ RUN pip install --upgrade pip
16
+ RUN pip install -r requirements.txt
17
+
18
+ # Matcha-TTS์™€ cosyvoice ์ฝ”๋“œ ๋ณต์‚ฌ
19
+ COPY . .
20
+
21
+ # ๋ฆฌ์†Œ์Šค ๋‹ค์šด๋กœ๋“œ ๋ฐ ์••์ถ• ํ•ด์ œ
22
+ RUN python -c "\
23
+ from modelscope import snapshot_download;\
24
+ snapshot_download('iic/CosyVoice2-0.5B', local_dir='pretrained_models/CosyVoice2-0.5B');\
25
+ snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')"
26
+
27
+ RUN unzip -o pretrained_models/CosyVoice-ttsfrd/resource.zip -d pretrained_models/CosyVoice-ttsfrd || echo "resource.zip not found"
28
+
29
+ # .whl ํŒŒ์ผ ์„ค์น˜
30
+ RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd_dependency-0.1-py3-none-any.whl || echo "dep whl missing"
31
+ RUN pip install pretrained_models/CosyVoice-ttsfrd/ttsfrd-0.4.2-cp310-cp310-linux_x86_64.whl || echo "core whl missing"
32
+
33
+ # Gradio app ์‹คํ–‰ (Hugging Face Spaces ๊ธฐ์ค€)
34
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ from cosyvoice.cli.cosyvoice import CosyVoice2
5
+ from cosyvoice.utils.file_utils import load_wav
6
+ import torchaudio
7
+ import os
8
+
9
+ # ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
10
+ cosyvoice = CosyVoice2(
11
+ 'pretrained_models/CosyVoice2-0.5B',
12
+ load_jit=False,
13
+ load_trt=False,
14
+ fp16=False,
15
+ use_flow_cache=False
16
+ )
17
+
18
+ def infer(text, prompt_text, prompt_wav):
19
+ if prompt_wav is None:
20
+ return "ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.", None
21
+
22
+ # ์—…๋กœ๋“œ๋œ ํŒŒ์ผ ๊ฒฝ๋กœ์—์„œ 16kHz๋กœ ๋กœ๋“œ
23
+ prompt_speech_16k = load_wav(prompt_wav, 16000)
24
+
25
+ # ์Œ์„ฑ ํ•ฉ์„ฑ ์‹คํ–‰
26
+ results = cosyvoice.inference_zero_shot(
27
+ text,
28
+ prompt_text=prompt_text,
29
+ prompt_speech_16k=prompt_speech_16k,
30
+ text_frontend=True
31
+ )
32
+
33
+ # ๊ฒฐ๊ณผ ์ €์žฅ
34
+ output_path = f'korean_tts_output.wav'
35
+ torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice.sample_rate)
36
+
37
+ return "ํ•ฉ์„ฑ ์™„๋ฃŒ!", output_path
38
+
39
+ iface = gr.Interface(
40
+ fn=infer,
41
+ inputs=[
42
+ gr.Textbox(label="TTSํ•  ํ…์ŠคํŠธ", lines=2, placeholder="์˜ˆ: ๊ณต๋ฃก์ด ๋ฐค์–‘๊ฐฑ์„ ๋ชฐ๋ž˜ ๋จน๊ณ  ๋„๋ง์ณค์–ด์š”."),
43
+ gr.Textbox(label="ํ”„๋กฌํ”„ํŠธ ๋ฌธ์žฅ ํ…์ŠคํŠธ", lines=1, placeholder="์˜ˆ: ์˜ค๋А๋ฅธ ์ปคํ”ผ ์•ˆ ๋งˆ์‹ค ๊บผ์•ผ"),
44
+ gr.Audio(label="ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ (wav)", type="filepath")
45
+ ],
46
+ outputs=[
47
+ gr.Text(label="์ƒํƒœ"),
48
+ gr.Audio(label="ํ•ฉ์„ฑ๋œ ์Œ์„ฑ")
49
+ ],
50
+ title="CosyVoice2 ์Œ์„ฑ ํ•ฉ์„ฑ๊ธฐ",
51
+ description="์ง์ ‘ ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ์„ ์—…๋กœ๋“œํ•˜๊ณ  TTS ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•ด๋ณด์„ธ์š”!"
52
+ )
53
+
54
+ if __name__ == "__main__":
55
+ iface.launch()
model/requirements.txt CHANGED
@@ -38,3 +38,5 @@ wget==3.2
38
  fastapi==0.115.6
39
  fastapi-cli==0.0.4
40
  WeTextProcessing==1.0.3
 
 
 
38
  fastapi==0.115.6
39
  fastapi-cli==0.0.4
40
  WeTextProcessing==1.0.3
41
+ fastapi
42
+ uvicorn[standard]