Spaces:
Paused
Paused
| import os | |
| import torch | |
| import torchaudio | |
| import tempfile | |
| from transformers import pipeline | |
| import time | |
| import warnings | |
| class HuggingFaceTTS: | |
| def __init__(self, model_name="facebook/mms-tts-eng"): | |
| """ | |
| Hugging Face์ TTS ๋ชจ๋ธ์ ์ฌ์ฉํ Text-to-Speech ํด๋์ค | |
| Args: | |
| model_name: ์ฌ์ฉํ TTS ๋ชจ๋ธ ์ด๋ฆ | |
| """ | |
| self.available = False | |
| self.model_name = model_name | |
| try: | |
| # ๊ฒฝ๊ณ ๋ฌด์ | |
| warnings.filterwarnings("ignore") | |
| # ์บ์ ๋๋ ํ ๋ฆฌ ์ค์ | |
| cache_dir = os.path.join(tempfile.gettempdir(), "hf_tts_cache") | |
| os.makedirs(cache_dir, exist_ok=True) | |
| print(f"[HF TTS] ๋ชจ๋ธ ๋ก๋ ์ค: {model_name}, ์บ์ ๋๋ ํ ๋ฆฌ: {cache_dir}") | |
| # ๋ชจ๋ธ ๋ก๋ (max_new_tokens ๋ฐ device ์ค์ ์ถ๊ฐ) | |
| start_time = time.time() | |
| self.synthesizer = pipeline( | |
| "text-to-speech", | |
| model=model_name, | |
| cache_dir=cache_dir, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| elapsed = time.time() - start_time | |
| self.available = True | |
| print(f"[HF TTS] ๋ชจ๋ธ({model_name}) ๋ก๋ ์ฑ๊ณต! ({elapsed:.1f}์ด)") | |
| except Exception as e: | |
| print(f"[HF TTS] ๋ชจ๋ธ({model_name}) ๋ก๋ ์คํจ: {e}") | |
| self.available = False | |
| def generate_speech(self, text, output_path): | |
| """ | |
| ํ ์คํธ๋ฅผ ์์ฑ์ผ๋ก ๋ณํ | |
| Args: | |
| text: ๋ณํํ ํ ์คํธ | |
| output_path: ์ ์ฅํ ํ์ผ ๊ฒฝ๋ก | |
| Returns: | |
| bool: ์ฑ๊ณต ์ฌ๋ถ | |
| """ | |
| if not self.available: | |
| return False | |
| try: | |
| # ์ค๋ฅ ๊ฒ์ฆ 1: ๋น ํ ์คํธ ํ์ธ | |
| if not text or text.strip() == "": | |
| print("[HF TTS] ๊ฒฝ๊ณ : ๋น ํ ์คํธ๊ฐ ์ ๋ ฅ๋์์ต๋๋ค") | |
| text = "No text provided." | |
| # ์ค๋ฅ ๊ฒ์ฆ 2: ํ ์คํธ๊ฐ ๋๋ฌด ๊ธธ๋ฉด ์๋ผ๋ | |
| if len(text) > 500: | |
| print(f"[HF TTS] ํ ์คํธ๊ฐ ๋๋ฌด ๊น๋๋ค ({len(text)}์). 500์๋ก ์๋ผ๋ ๋๋ค.") | |
| text = text[:497] + "..." | |
| print(f"[HF TTS] ์์ฑ ์์ฑ ์์: '{text[:50]}...' (์ ์ฒด {len(text)}์)") | |
| # ์์ฑ ์์ฑ | |
| with warnings.catch_warnings(): | |
| warnings.simplefilter("ignore") | |
| speech = self.synthesizer(text) | |
| # ํ์ผ ์ ์ฅ ํ์ธ | |
| file_path = output_path | |
| with open(file_path, "wb") as f: | |
| f.write(speech["audio"]) | |
| # ํ์ผ ์์ฑ ํ์ธ | |
| if os.path.exists(file_path) and os.path.getsize(file_path) > 0: | |
| print(f"[HF TTS] ์์ฑ ํ์ผ ์ ์ฅ ์ฑ๊ณต: {file_path}, ํฌ๊ธฐ: {os.path.getsize(file_path)} ๋ฐ์ดํธ") | |
| return True | |
| else: | |
| print(f"[HF TTS] ์ค๋ฅ: ํ์ผ์ด ์ ์ฅ๋์ง ์์๊ฑฐ๋ ๋น์ด ์์ต๋๋ค: {file_path}") | |
| return False | |
| except Exception as e: | |
| print(f"[HF TTS] ์์ฑ ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| return False |