parkjihye's picture
Update app.py
5bd0edb verified
# app.py
from fastapi import FastAPI, Request, File, UploadFile, Form
from fastapi.responses import HTMLResponse, FileResponse
from pydantic import BaseModel
import tempfile
import os
import sys
import subprocess
import traceback
import torchaudio
from modelscope import snapshot_download
import threading
# ์ „์—ญ ๋ฝ ๊ฐ์ฒด ์ƒ์„ฑ
tts_lock = threading.Lock()
# ---------------- CosyVoice ๊ฒฝ๋กœ ์„ค์ • ----------------
sys.path.append('/app/model')
sys.path.append('/app/model/third_party/Matcha-TTS')
from cosyvoice.cli.cosyvoice import CosyVoice2
from cosyvoice.utils.file_utils import load_wav
# ---------------- ์ „์—ญ ๋ณ€์ˆ˜ ----------------
cosyvoice_model = None
# ---------------- ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ํ•จ์ˆ˜ ----------------
def initialize_cosyvoice():
"""CosyVoice2 ๋ชจ๋ธ์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค."""
global cosyvoice_model
try:
print("=== CosyVoice2 ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์‹œ์ž‘ ===")
# ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ cosyvoice ๋ชจ๋“ˆ ์œ„์น˜๋กœ ๋ณ€๊ฒฝ
original_cwd = os.getcwd()
cosyvoice_dir = '/app/model/cosyvoice'
print(f"์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ๋ณ€๊ฒฝ: {original_cwd} -> {cosyvoice_dir}")
os.chdir(cosyvoice_dir)
# ๋ชจ๋ธ ๊ฒฝ๋กœ ํ™•์ธ
model_path = '/app/pretrained_models/CosyVoice2-0.5B'
ttsfrd_path = '/app/pretrained_models/CosyVoice-ttsfrd'
resource_path = '/app/pretrained_models/CosyVoice-ttsfrd/resource'
print(f"๋ชจ๋ธ ๊ฒฝ๋กœ ํ™•์ธ: {model_path}")
print(f"๋ชจ๋ธ ๊ฒฝ๋กœ ์กด์žฌ: {os.path.exists(model_path)}")
print(f"ttsfrd ๊ฒฝ๋กœ ํ™•์ธ: {ttsfrd_path}")
print(f"ttsfrd ๊ฒฝ๋กœ ์กด์žฌ: {os.path.exists(ttsfrd_path)}")
print(f"๋ฆฌ์†Œ์Šค ๊ฒฝ๋กœ ํ™•์ธ: {resource_path}")
print(f"๋ฆฌ์†Œ์Šค ๊ฒฝ๋กœ ์กด์žฌ: {os.path.exists(resource_path)}")
if os.path.exists(ttsfrd_path):
print("ttsfrd ๋””๋ ‰ํ† ๋ฆฌ ๋‚ด์šฉ:")
for item in os.listdir(ttsfrd_path):
item_path = os.path.join(ttsfrd_path, item)
print(f" {item} ({'dir' if os.path.isdir(item_path) else 'file'})")
if os.path.exists(resource_path):
print("resource ๋””๋ ‰ํ† ๋ฆฌ ๋‚ด์šฉ:")
for item in os.listdir(resource_path):
print(f" {item}")
if not os.path.exists(model_path):
print(f"โŒ ๋ชจ๋ธ ๊ฒฝ๋กœ๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค: {model_path}")
return False
if not os.path.exists(resource_path):
print(f"โŒ ๋ฆฌ์†Œ์Šค ๊ฒฝ๋กœ๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค: {resource_path}")
return False
# ROOT_DIR ๊ธฐ์ค€ ์ƒ๋Œ€ ๊ฒฝ๋กœ ํ™•์ธ
expected_resource_path = os.path.join(os.getcwd(), '../../pretrained_models/CosyVoice-ttsfrd/resource')
normalized_path = os.path.normpath(expected_resource_path)
print(f"CosyVoice๊ฐ€ ์ฐพ๋Š” ๋ฆฌ์†Œ์Šค ๊ฒฝ๋กœ: {normalized_path}")
print(f"ํ•ด๋‹น ๊ฒฝ๋กœ ์กด์žฌ ์—ฌ๋ถ€: {os.path.exists(normalized_path)}")
# ๋ชจ๋ธ ๋กœ๋“œ
print("CosyVoice2 ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
cosyvoice_model = CosyVoice2(
model_path,
load_jit=False,
load_trt=False,
fp16=False,
)
# ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ๋ณต์›
os.chdir(original_cwd)
print("โœ… CosyVoice2 ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ!")
return True
except Exception as e:
# ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ๋ณต์›
try:
os.chdir(original_cwd)
except:
pass
print(f"โŒ ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {str(e)}")
traceback.print_exc()
return False
# ---------------- ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ----------------
from contextlib import asynccontextmanager
@asynccontextmanager
async def lifespan(app: FastAPI):
"""์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ๋ชจ๋ธ์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค."""
print("๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘ - ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์ค‘...")
initialize_cosyvoice()
yield
# FastAPI ์•ฑ์— lifespan ์ ์šฉ
app = FastAPI(
title="CosyVoice2 Korean TTS API",
description="FastAPI + CosyVoice2 ๊ธฐ๋ฐ˜ ํ•œ๊ตญ์–ด ์Œ์„ฑ ํ•ฉ์„ฑ ์„œ๋ฒ„",
version="1.0.0",
lifespan=lifespan
)
# ---------------- ์ž…๋ ฅ/์ถœ๋ ฅ ๋ชจ๋ธ ----------------
class TTSRequest(BaseModel):
text: str
prompt_text: str
class TTSResponse(BaseModel):
status: str
message: str
audio_path: str = None
# ---------------- API: JSON POST ----------------
@app.post("/synthesize", response_model=TTSResponse)
async def synthesize_speech(request: TTSRequest, prompt_audio: UploadFile = File(...)):
"""
์Œ์„ฑ ํ•ฉ์„ฑ API
- text: ํ•ฉ์„ฑํ•  ํ…์ŠคํŠธ
- prompt_text: ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ์˜ ํ…์ŠคํŠธ
- prompt_audio: ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ ํŒŒ์ผ (wav, mp3, flac ๋“ฑ)
"""
if cosyvoice_model is None:
return TTSResponse(
status="error",
message="๋ชจ๋ธ์ด ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋ฒ„ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”."
)
try:
# ์ž„์‹œ ํŒŒ์ผ๋กœ ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ ์ €์žฅ (ํ™•์žฅ์ž ์œ ์ง€)
temp_file_extension = os.path.splitext(prompt_audio.filename)[1].lower()
if not temp_file_extension:
temp_file_extension = '.wav' # ๊ธฐ๋ณธ๊ฐ’
with tempfile.NamedTemporaryFile(delete=False, suffix=temp_file_extension) as temp_file:
temp_file.write(await prompt_audio.read())
temp_path = temp_file.name
# ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ ๋กœ๋“œ (16kHz)
try:
prompt_speech_16k = load_wav(temp_path, 16000)
except Exception as e:
print(f"load_wav ์‹คํŒจ: {e}")
# fallback: librosa ์ง์ ‘ ์‚ฌ์šฉ
import librosa
import torch
audio_data, sr = librosa.load(temp_path, sr=16000)
prompt_speech_16k = torch.from_numpy(audio_data).unsqueeze(0)
# ์Œ์„ฑ ํ•ฉ์„ฑ ์‹คํ–‰
results_generator = cosyvoice_model.inference_zero_shot(
request.text,
prompt_text=request.prompt_text,
prompt_speech_16k=prompt_speech_16k,
text_frontend=True
)
# generator๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
results = list(results_generator)
if not results:
return TTSResponse(
status="error",
message="์Œ์„ฑ ํ•ฉ์„ฑ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค."
)
# ๊ฒฐ๊ณผ ์ €์žฅ (์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ์ง€์ •)
output_dir = '/app/outputs'
os.makedirs(output_dir, exist_ok=True)
output_filename = f'output_{hash(request.text)}.wav'
output_path = os.path.join(output_dir, output_filename)
torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice_model.sample_rate)
# ์ž„์‹œ ํŒŒ์ผ ์ •๋ฆฌ
os.unlink(temp_path)
return TTSResponse(
status="success",
message="์Œ์„ฑ ํ•ฉ์„ฑ์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
audio_path=f'outputs/{output_filename}'
)
except Exception as e:
return TTSResponse(
status="error",
message=f"์Œ์„ฑ ํ•ฉ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
)
# ---------------- ์˜ค๋””์˜ค ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ ----------------
@app.get("/download/{filepath:path}")
async def download_audio(filepath: str):
"""ํ•ฉ์„ฑ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ์„ ๋‹ค์šด๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
full_path = os.path.join('/app', filepath)
if os.path.exists(full_path):
filename = os.path.basename(filepath)
return FileResponse(full_path, media_type="audio/wav", filename=filename)
else:
return {"error": "ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
# ---------------- HTML UI ----------------
@app.get("/", response_class=HTMLResponse)
async def main_ui():
return """
<html>
<head>
<title>CosyVoice2 Korean TTS</title>
<meta charset="UTF-8">
<style>
body {
font-family: 'Segoe UI', Arial, sans-serif;
max-width: 900px;
margin: auto;
padding: 2rem;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
}
.container {
background-color: white;
padding: 2.5rem;
border-radius: 15px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
}
.header {
text-align: center;
margin-bottom: 2rem;
}
.header h1 {
color: #333;
margin-bottom: 0.5rem;
}
.header p {
color: #666;
font-size: 1.1rem;
}
.form-group {
margin-bottom: 1.5rem;
}
label {
display: block;
margin-bottom: 0.5rem;
font-weight: 600;
color: #333;
}
input[type="text"], textarea {
width: 100%;
padding: 0.75rem;
border: 2px solid #e0e0e0;
border-radius: 8px;
font-size: 1rem;
box-sizing: border-box;
transition: border-color 0.3s;
}
input[type="text"]:focus, textarea:focus {
outline: none;
border-color: #667eea;
}
input[type="file"] {
width: 100%;
padding: 0.75rem;
border: 2px dashed #ccc;
border-radius: 8px;
background-color: #f9f9f9;
box-sizing: border-box;
transition: all 0.3s;
}
input[type="file"]:hover {
border-color: #667eea;
background-color: #f0f4ff;
}
input[type="submit"] {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 1rem 2rem;
border: none;
border-radius: 8px;
cursor: pointer;
font-size: 1.1rem;
font-weight: 600;
transition: transform 0.2s;
width: 100%;
}
input[type="submit"]:hover {
transform: translateY(-2px);
}
.info {
background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%);
padding: 1.5rem;
border-radius: 10px;
margin-bottom: 2rem;
border-left: 4px solid #667eea;
}
.example {
background-color: #f8f9fa;
padding: 1rem;
border-radius: 8px;
margin-top: 0.5rem;
border-left: 3px solid #28a745;
}
.example strong {
color: #28a745;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>๐ŸŽค CosyVoice2 ์Œ์„ฑ ํ•ฉ์„ฑ๊ธฐ</h1>
<p>ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋ฅผ ์ž์—ฐ์Šค๋Ÿฌ์šด ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜ํ•ด๋ณด์„ธ์š”!</p>
</div>
<div class="info">
<strong>๐Ÿ“‹ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•:</strong><br>
1. <strong>ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ:</strong> ๋ชฉ์†Œ๋ฆฌ ์Šคํƒ€์ผ์˜ ๊ธฐ์ค€์ด ๋  ์Œ์„ฑ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”<br>
2. <strong>ํ”„๋กฌํ”„ํŠธ ํ…์ŠคํŠธ:</strong> ์—…๋กœ๋“œํ•œ ์Œ์„ฑ์˜ ์‹ค์ œ ๋‚ด์šฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”<br>
3. <strong>ํ•ฉ์„ฑํ•  ํ…์ŠคํŠธ:</strong> ์ƒˆ๋กœ ์ƒ์„ฑํ•˜๊ณ  ์‹ถ์€ ์Œ์„ฑ์˜ ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”<br><br>
<strong>์ง€์› ํ˜•์‹:</strong> WAV
</div>
<form action="/submit" method="post" enctype="multipart/form-data">
<div class="form-group">
<label for="prompt_audio">๐ŸŽต ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ ํŒŒ์ผ:</label>
<input type="file" id="prompt_audio" name="prompt_audio" accept=".wav" required>
<div class="example">
<strong>์˜ˆ์‹œ:</strong> "์•ˆ๋…•ํ•˜์„ธ์š”"๋ผ๊ณ  ๋งํ•˜๋Š” ์Œ์„ฑ ํŒŒ์ผ
</div>
</div>
<div class="form-group">
<label for="prompt_text">๐Ÿ“ ํ”„๋กฌํ”„ํŠธ ํ…์ŠคํŠธ:</label>
<input type="text" id="prompt_text" name="prompt_text"
placeholder="์—…๋กœ๋“œํ•œ ์Œ์„ฑ์˜ ์‹ค์ œ ๋‚ด์šฉ"
value="์•ˆ๋…•ํ•˜์„ธ์š”" required>
<div class="example">
<strong>์˜ˆ์‹œ:</strong> ์•ˆ๋…•ํ•˜์„ธ์š” (์—…๋กœ๋“œํ•œ ์Œ์„ฑ ํŒŒ์ผ์˜ ์‹ค์ œ ๋‚ด์šฉ)
</div>
</div>
<div class="form-group">
<label for="text">๐ŸŽฏ ํ•ฉ์„ฑํ•  ํ…์ŠคํŠธ:</label>
<textarea id="text" name="text" rows="3"
placeholder="์ƒˆ๋กœ ์ƒ์„ฑํ•˜๊ณ  ์‹ถ์€ ์Œ์„ฑ์˜ ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”"
required>๊ณต๋ฃก์ด ๋ฐค์–‘๊ฐฑ์„ ๋ชฐ๋ž˜ ๋จน๊ณ  ๋„๋ง์ณค์–ด์š”.</textarea>
<div class="example">
<strong>์˜ˆ์‹œ:</strong> ๊ณต๋ฃก์ด ๋ฐค์–‘๊ฐฑ์„ ๋ชฐ๋ž˜ ๋จน๊ณ  ๋„๋ง์ณค์–ด์š”.
</div>
</div>
<input type="submit" value="๐Ÿš€ ์Œ์„ฑ ํ•ฉ์„ฑ ์‹œ์ž‘">
</form>
</div>
</body>
</html>
"""
# ---------------- ๊ฒฐ๊ณผ ๋ Œ๋”๋ง ----------------
@app.post("/submit", response_class=HTMLResponse)
async def handle_form(
request: Request,
text: str = Form(...),
prompt_text: str = Form(...),
prompt_audio: UploadFile = File(...)
):
try:
if cosyvoice_model is None:
return """
<html>
<head><title>์—๋Ÿฌ</title><meta charset="UTF-8"></head>
<body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;">
<h1>โŒ ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์˜ค๋ฅ˜</h1>
<p>CosyVoice2 ๋ชจ๋ธ์ด ์•„์ง ์ดˆ๊ธฐํ™”๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.</p>
<p>์„œ๋ฒ„ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•˜๊ณ  ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.</p>
<br>
<a href="/" style="color: #667eea; text-decoration: none;">โ† ๋Œ์•„๊ฐ€๊ธฐ</a>
</body>
</html>
"""
# ํŒŒ์ผ ํ˜•์‹ ๊ฒ€์ฆ
if not prompt_audio.filename.lower().endswith('.wav'):
return """
<html>
<head><title>์—๋Ÿฌ</title><meta charset="UTF-8"></head>
<body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;">
<h1>โŒ ํŒŒ์ผ ํ˜•์‹ ์˜ค๋ฅ˜</h1>
<p>WAV ํŒŒ์ผ๋งŒ ์ง€์›๋ฉ๋‹ˆ๋‹ค.</p>
<p><strong>์ง€์› ํ˜•์‹:</strong> WAV</p>
<br>
<a href="/" style="color: #667eea; text-decoration: none;">โ† ๋Œ์•„๊ฐ€๊ธฐ</a>
</body>
</html>
"""
# ์ž„์‹œ ํŒŒ์ผ๋กœ ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ ์ €์žฅ
temp_file_extension = os.path.splitext(prompt_audio.filename)[1].lower()
if not temp_file_extension:
temp_file_extension = '.wav' # ๊ธฐ๋ณธ๊ฐ’
with tempfile.NamedTemporaryFile(delete=False, suffix=temp_file_extension) as temp_file:
temp_file.write(await prompt_audio.read())
temp_path = temp_file.name
print(f"์—…๋กœ๋“œ๋œ ํŒŒ์ผ: {prompt_audio.filename}")
print(f"์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ: {temp_path}")
print(f"ํŒŒ์ผ ํฌ๊ธฐ: {os.path.getsize(temp_path)} bytes")
# ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ ๋กœ๋“œ (16kHz) - ๋” ์•ˆ์ „ํ•œ ๋ฐฉ๋ฒ•์œผ๋กœ
try:
prompt_speech_16k = load_wav(temp_path, 16000)
print(f"์˜ค๋””์˜ค ๋กœ๋“œ ์„ฑ๊ณต: shape={prompt_speech_16k.shape}")
except Exception as e:
print(f"load_wav ์‹คํŒจ: {e}")
# fallback: librosa ์ง์ ‘ ์‚ฌ์šฉ
import librosa
import torch
audio_data, sr = librosa.load(temp_path, sr=16000)
prompt_speech_16k = torch.from_numpy(audio_data).unsqueeze(0)
print(f"librosa fallback ์„ฑ๊ณต: shape={prompt_speech_16k.shape}")
# ์Œ์„ฑ ํ•ฉ์„ฑ ์‹คํ–‰
print(f"์Œ์„ฑ ํ•ฉ์„ฑ ์‹œ์ž‘: text='{text}', prompt_text='{prompt_text}'")
results_generator = cosyvoice_model.inference_zero_shot(
text,
prompt_text=prompt_text,
prompt_speech_16k=prompt_speech_16k,
text_frontend=True
)
# generator๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
results = list(results_generator)
print(f"์Œ์„ฑ ํ•ฉ์„ฑ ์™„๋ฃŒ! ๊ฒฐ๊ณผ ๊ฐœ์ˆ˜: {len(results)}")
if not results:
raise Exception("์Œ์„ฑ ํ•ฉ์„ฑ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค.")
# ๊ฒฐ๊ณผ ์ €์žฅ (์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ์ง€์ •)
output_dir = '/app/outputs'
os.makedirs(output_dir, exist_ok=True)
output_filename = f'korean_tts_output_{hash(text)}.wav'
output_path = os.path.join(output_dir, output_filename)
torchaudio.save(output_path, results[0]['tts_speech'], cosyvoice_model.sample_rate)
print(f"์˜ค๋””์˜ค ํŒŒ์ผ ์ €์žฅ ์™„๋ฃŒ: {output_path}")
# ๋‹ค์šด๋กœ๋“œ์šฉ ์ƒ๋Œ€ ๊ฒฝ๋กœ
download_filename = f'outputs/{output_filename}'
# ์ž„์‹œ ํŒŒ์ผ ์ •๋ฆฌ
os.unlink(temp_path)
except Exception as e:
error_details = traceback.format_exc()
return f"""
<html>
<head><title>์—๋Ÿฌ</title><meta charset="UTF-8"></head>
<body style="font-family: Arial, sans-serif; max-width: 700px; margin: auto; padding: 2rem;">
<h1>โŒ ์„œ๋ฒ„ ์˜ค๋ฅ˜ ๋ฐœ์ƒ</h1>
<p><strong>์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€:</strong></p>
<pre style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; overflow-x: auto;">{str(e)}</pre>
<hr>
<details>
<summary><strong>์—๋Ÿฌ ์ƒ์„ธ (ํด๋ฆญํ•˜์—ฌ ํŽผ์น˜๊ธฐ)</strong></summary>
<pre style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; overflow-x: auto;">{error_details}</pre>
</details>
<br>
<a href="/" style="color: #667eea; text-decoration: none;">โ† ๋Œ์•„๊ฐ€๊ธฐ</a>
</body>
</html>
"""
return f"""
<html>
<head><title>ํ•ฉ์„ฑ ๊ฒฐ๊ณผ</title><meta charset="UTF-8"></head>
<body style="font-family: Arial, sans-serif; max-width: 700px; margin: auto; padding: 2rem;">
<h1>โœ… ์Œ์„ฑ ํ•ฉ์„ฑ ์™„๋ฃŒ!</h1>
<div style="background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); padding: 1.5rem; border-radius: 10px; margin: 1.5rem 0;">
<h3>๐Ÿ“‹ ์ž…๋ ฅ ์ •๋ณด</h3>
<p><strong>ํ”„๋กฌํ”„ํŠธ ์Œ์„ฑ:</strong> {prompt_audio.filename}</p>
<p><strong>ํ”„๋กฌํ”„ํŠธ ํ…์ŠคํŠธ:</strong> {prompt_text}</p>
<p><strong>ํ•ฉ์„ฑํ•  ํ…์ŠคํŠธ:</strong> {text}</p>
</div>
<div style="background-color: #f8f9fa; padding: 1.5rem; border-radius: 10px; border-left: 4px solid #28a745;">
<h3>๐ŸŽต ํ•ฉ์„ฑ๋œ ์Œ์„ฑ</h3>
<audio controls style="width: 100%; margin: 1rem 0;">
<source src="/download/{download_filename}" type="audio/wav">
๋ธŒ๋ผ์šฐ์ €๊ฐ€ ์˜ค๋””์˜ค๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.
</audio>
<br>
<a href="/download/{download_filename}"
style="background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
color: white; padding: 0.75rem 1.5rem; text-decoration: none;
border-radius: 8px; display: inline-block; margin-top: 1rem;">
๐Ÿ“ฅ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
</a>
</div>
<br>
<a href="/" style="color: #667eea; text-decoration: none; font-size: 1.1rem;">โ† ๋‹ค์‹œ ์‹œ๋„ํ•˜๊ธฐ</a>
</body>
</html>
"""
# ---------------- ํ—ฌ์Šค ์ฒดํฌ ----------------
@app.get("/health")
async def health_check():
return {
"status": "ok" if cosyvoice_model is not None else "initializing",
"model_loaded": cosyvoice_model is not None,
"description": "CosyVoice2 Korean TTS Server"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)