STT_TTS_chatbot / utils /huggingface_tts.py
jeongsoo's picture
Add application file
5688789
import os
import torch
import torchaudio
import tempfile
from transformers import pipeline
import time
import warnings
class HuggingFaceTTS:
def __init__(self, model_name="facebook/mms-tts-eng"):
"""
Hugging Face์˜ TTS ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ Text-to-Speech ํด๋ž˜์Šค
Args:
model_name: ์‚ฌ์šฉํ•  TTS ๋ชจ๋ธ ์ด๋ฆ„
"""
self.available = False
self.model_name = model_name
try:
# ๊ฒฝ๊ณ  ๋ฌด์‹œ
warnings.filterwarnings("ignore")
# ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
cache_dir = os.path.join(tempfile.gettempdir(), "hf_tts_cache")
os.makedirs(cache_dir, exist_ok=True)
print(f"[HF TTS] ๋ชจ๋ธ ๋กœ๋“œ ์ค‘: {model_name}, ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ: {cache_dir}")
# ๋ชจ๋ธ ๋กœ๋“œ (max_new_tokens ๋ฐ device ์„ค์ • ์ถ”๊ฐ€)
start_time = time.time()
self.synthesizer = pipeline(
"text-to-speech",
model=model_name,
cache_dir=cache_dir,
device=0 if torch.cuda.is_available() else -1
)
elapsed = time.time() - start_time
self.available = True
print(f"[HF TTS] ๋ชจ๋ธ({model_name}) ๋กœ๋“œ ์„ฑ๊ณต! ({elapsed:.1f}์ดˆ)")
except Exception as e:
print(f"[HF TTS] ๋ชจ๋ธ({model_name}) ๋กœ๋“œ ์‹คํŒจ: {e}")
self.available = False
def generate_speech(self, text, output_path):
"""
ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
Args:
text: ๋ณ€ํ™˜ํ•  ํ…์ŠคํŠธ
output_path: ์ €์žฅํ•  ํŒŒ์ผ ๊ฒฝ๋กœ
Returns:
bool: ์„ฑ๊ณต ์—ฌ๋ถ€
"""
if not self.available:
return False
try:
# ์˜ค๋ฅ˜ ๊ฒ€์ฆ 1: ๋นˆ ํ…์ŠคํŠธ ํ™•์ธ
if not text or text.strip() == "":
print("[HF TTS] ๊ฒฝ๊ณ : ๋นˆ ํ…์ŠคํŠธ๊ฐ€ ์ž…๋ ฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค")
text = "No text provided."
# ์˜ค๋ฅ˜ ๊ฒ€์ฆ 2: ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ๋ƒ„
if len(text) > 500:
print(f"[HF TTS] ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊น๋‹ˆ๋‹ค ({len(text)}์ž). 500์ž๋กœ ์ž˜๋ผ๋ƒ…๋‹ˆ๋‹ค.")
text = text[:497] + "..."
print(f"[HF TTS] ์Œ์„ฑ ์ƒ์„ฑ ์‹œ์ž‘: '{text[:50]}...' (์ „์ฒด {len(text)}์ž)")
# ์Œ์„ฑ ์ƒ์„ฑ
with warnings.catch_warnings():
warnings.simplefilter("ignore")
speech = self.synthesizer(text)
# ํŒŒ์ผ ์ €์žฅ ํ™•์ธ
file_path = output_path
with open(file_path, "wb") as f:
f.write(speech["audio"])
# ํŒŒ์ผ ์ƒ์„ฑ ํ™•์ธ
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
print(f"[HF TTS] ์Œ์„ฑ ํŒŒ์ผ ์ €์žฅ ์„ฑ๊ณต: {file_path}, ํฌ๊ธฐ: {os.path.getsize(file_path)} ๋ฐ”์ดํŠธ")
return True
else:
print(f"[HF TTS] ์˜ค๋ฅ˜: ํŒŒ์ผ์ด ์ €์žฅ๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค: {file_path}")
return False
except Exception as e:
print(f"[HF TTS] ์Œ์„ฑ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
return False