STT_TTS_chatbot / utils /hf_hub_tts.py
jeongsoo's picture
Add application file
10c1a86
import os
import torch
import tempfile
import time
from huggingface_hub import hf_hub_download
import torchaudio
class HFHubTTS:
"""Hugging Face Hub์—์„œ ์ง์ ‘ TTS ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜๋Š” ํด๋ž˜์Šค"""
def __init__(self, model_id="facebook/mms-tts-eng"):
"""
Hugging Face Hub์—์„œ TTS ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
Args:
model_id: ์‚ฌ์šฉํ•  ๋ชจ๋ธ ID (๊ธฐ๋ณธ๊ฐ’: "facebook/mms-tts-eng")
"""
self.available = False
self.model_id = model_id
try:
from transformers import pipeline
print(f"[HF Hub TTS] ๋ชจ๋ธ ๋กœ๋“œ ์ค‘: {model_id}")
start_time = time.time()
# ๋ชจ๋ธ ๋กœ๋“œ
self.tts = pipeline("text-to-speech", model=model_id)
elapsed = time.time() - start_time
self.available = True
print(f"[HF Hub TTS] ๋ชจ๋ธ '{model_id}' ๋กœ๋“œ ์„ฑ๊ณต! ({elapsed:.1f}์ดˆ)")
except Exception as e:
print(f"[HF Hub TTS] ๋ชจ๋ธ '{model_id}' ๋กœ๋“œ ์‹คํŒจ: {e}")
self.available = False
def generate_speech(self, text, output_path):
"""
ํ…์ŠคํŠธ๋ฅผ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜
Args:
text: ๋ณ€ํ™˜ํ•  ํ…์ŠคํŠธ
output_path: ์ €์žฅํ•  ํŒŒ์ผ ๊ฒฝ๋กœ
Returns:
bool: ์„ฑ๊ณต ์—ฌ๋ถ€
"""
if not self.available:
return False
try:
# ํ…์ŠคํŠธ ๊ฒ€์ฆ
if not text or len(text.strip()) == 0:
print("[HF Hub TTS] ๋นˆ ํ…์ŠคํŠธ๊ฐ€ ์ž…๋ ฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
return False
print(f"[HF Hub TTS] ์Œ์„ฑ ์ƒ์„ฑ ์‹œ์ž‘: '{text[:50]}{'...' if len(text) > 50 else ''}'")
start_time = time.time()
# ์Œ์„ฑ ์ƒ์„ฑ
speech = self.tts(text)
# ํŒŒ์ผ ์ €์žฅ
with open(output_path, "wb") as f:
f.write(speech["audio"])
elapsed = time.time() - start_time
# ๊ฒฐ๊ณผ ํ™•์ธ
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
print(f"[HF Hub TTS] ์Œ์„ฑ ์ƒ์„ฑ ์„ฑ๊ณต: {output_path} ({elapsed:.1f}์ดˆ)")
return True
else:
print(f"[HF Hub TTS] ์Œ์„ฑ ํŒŒ์ผ์ด ์ƒ์„ฑ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
return False
except Exception as e:
print(f"[HF Hub TTS] ์Œ์„ฑ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
return False