# server.py from __future__ import annotations from typing import Optional, List, Dict, Any from pathlib import Path from urllib.parse import quote_plus from datetime import datetime import os import json import uuid import mimetypes from fastmcp import FastMCP from huggingface_hub import HfApi from tts_core import get_eleven_client, ensure_output_dir, generate_and_save_audio from voices import ( load_voices_map, list_voices_data, resolve_voice, VOICES_MAP_PATH_DEFAULT, ) # ====== Hugging Face Space config (where Gradio UI is running) ====== ONLINE_UI_BASE = "https://trung06042002-mcptts.hf.space" # Repo Space của bạn (đúng theo link) HF_SPACE_REPO_ID = "trung06042002/mcptts" HF_REPO_TYPE = "space" # Folder trong repo Space để chứa audio + metadata HF_AUDIO_DIR = "audios" HF_META_DIR = "meta" # Token upload (tạo trên HF settings/tokens, quyền write) HF_TOKEN_ENV = "HF_TOKEN" mcp = FastMCP("elevenlabs-tts") def _require_hf_token() -> str: token = os.getenv(HF_TOKEN_ENV) if not token: raise RuntimeError( f"Missing {HF_TOKEN_ENV}. Please export {HF_TOKEN_ENV}= " "before running the MCP server." ) return token def _safe_stem(text: str, max_len: int = 40) -> str: s = "".join(c if c.isalnum() else "_" for c in text.strip().lower()) s = "_".join([p for p in s.split("_") if p]) return (s[:max_len] or "tts").rstrip("_") def _guess_ext_from_format(output_format: str) -> str: # output_format examples: "mp3_44100_128", "wav_44100" head = (output_format or "").split("_", 1)[0].lower() if head in {"mp3", "wav", "ogg", "flac", "m4a"}: return f".{head}" # fallback return ".mp3" def _upload_file_to_space(local_path: Path, path_in_repo: str, commit_message: str) -> None: token = _require_hf_token() api = HfApi(token=token) api.upload_file( path_or_fileobj=str(local_path), path_in_repo=path_in_repo, repo_id=HF_SPACE_REPO_ID, repo_type=HF_REPO_TYPE, commit_message=commit_message, ) def upload_audio_and_meta( audio_path: str, *, text: str, voice_key: str, voice_label: str, voice_id: str, model_id: str, output_format: str, ) -> Dict[str, str]: """ Upload audio + metadata JSON lên HF Space repo. Trả về: - hf_audio_path: path trong repo (vd: audios/xxx.mp3) - hf_meta_path: path trong repo (vd: meta/xxx.json) - hf_audio_url: URL để truy cập file qua hf.space - ui_url: URL UI (Gradio) """ p = Path(audio_path) if not p.exists(): raise FileNotFoundError(f"Audio file not found: {audio_path}") # tạo tên file unique để tránh đè ext = p.suffix if p.suffix else _guess_ext_from_format(output_format) safe = _safe_stem(text) ts = datetime.now().strftime("%Y%m%d_%H%M%S") uid = uuid.uuid4().hex[:8] filename = f"{safe}_{voice_key}_{ts}_{uid}{ext}" hf_audio_path = f"{HF_AUDIO_DIR}/{filename}" hf_meta_path = f"{HF_META_DIR}/{filename}.json" # metadata size_bytes = p.stat().st_size mime = mimetypes.guess_type(filename)[0] or "application/octet-stream" meta = { "text": text, "voice_key": voice_key, "voice_label": voice_label, "voice_id": voice_id, "model_id": model_id, "output_format": output_format, "filename": filename, "size_bytes": size_bytes, "mime_type": mime, "created_at": ts, } tmp_meta = p.with_suffix(p.suffix + ".json.tmp") tmp_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") # upload audio _upload_file_to_space( p, hf_audio_path, commit_message=f"Upload audio: {filename}", ) # upload meta _upload_file_to_space( tmp_meta, hf_meta_path, commit_message=f"Upload meta: {filename}.json", ) # cleanup temp meta try: tmp_meta.unlink(missing_ok=True) # py3.8+ has missing_ok except TypeError: if tmp_meta.exists(): tmp_meta.unlink() # URL truy cập file trên hf.space (path-based) # Lưu ý: hf.space có route /file/ hf_audio_url = f"{ONLINE_UI_BASE}/file/{hf_audio_path}" ui_url = ONLINE_UI_BASE # UI chỉ list/play/download; không cần query params nữa return { "hf_audio_path": hf_audio_path, "hf_meta_path": hf_meta_path, "hf_audio_url": hf_audio_url, "ui_url": ui_url, } @mcp.tool def list_voices( voices_map_path: str = VOICES_MAP_PATH_DEFAULT, ) -> List[Dict[str, Any]]: """ Liệt kê các voice khả dụng từ voices.yaml. Trả về list: - key: key dùng trong code (vd: 'sarah') - voice_id: mã ElevenLabs - label: tên hiển thị """ voices_map = load_voices_map(voices_map_path) return list_voices_data(voices_map) @mcp.tool def generate_tts( text: str, voices: Optional[List[str]] = None, voice: Optional[str] = None, # 1 giọng đơn model_id: str = "eleven_turbo_v2", output_dir: str = "./outputs", output_format: str = "mp3_44100_128", language_code: Optional[str] = None, env_path: str = ".env", voices_map_path: str = VOICES_MAP_PATH_DEFAULT, stability: float = 0.3, similarity_boost: float = 0.7, style: float = 0.8, use_speaker_boost: bool = True, speed: Optional[float] = None, upload_to_hf: bool = True, # ✅ NEW: có upload lên HF Space hay không ) -> Dict[str, Any]: """ Sinh 1 hoặc nhiều file TTS từ text và (tuỳ chọn) upload lên Hugging Face Space để nghe/download online. - Nếu KHÔNG truyền 'voices' và cũng KHÔNG truyền 'voice': -> Không sinh audio, trả: { "status": "need_voice_selection", "available_voices": [...], "message": "..." } - Nếu truyền 'voices' (list) -> sinh cho tất cả. - Nếu truyền 'voice' (string) -> sinh cho 1 giọng. Nếu upload_to_hf=True: - yêu cầu env var HF_TOKEN (write token) - upload audio vào Space repo: audios/ - upload meta vào Space repo: meta/.json - trả thêm hf_audio_url + ui_url """ voices_map = load_voices_map(voices_map_path) # Xác định danh sách voice yêu cầu requested: List[str] = [] if voices and len(voices) > 0: requested.extend(voices) elif voice: requested.append(voice) else: return { "status": "need_voice_selection", "message": ( "No voice was specified. Please choose one or more voices from " "'available_voices' and call generate_tts again with the 'voices' " "parameter (or 'voice' for a single voice)." ), "available_voices": list_voices_data(voices_map), } # Chuẩn bị client & output dir (local) eleven = get_eleven_client(env_path) base_output_dir = ensure_output_dir(output_dir) voice_settings = { "stability": stability, "similarity_boost": similarity_boost, "style": style, "use_speaker_boost": use_speaker_boost, } if speed is not None: voice_settings["speed"] = speed results: List[Dict[str, Any]] = [] for v in requested: resolved = resolve_voice(v, voices_map) voice_id = resolved["voice_id"] voice_key = resolved["voice_key"] voice_label = resolved["voice_label"] if not voice_id: raise ValueError( f"Could not resolve voice '{voice_key}' to a valid voice_id." ) audio_path = generate_and_save_audio( eleven=eleven, text=text, voice_id=voice_id, model_id=model_id, output_dir=base_output_dir, output_format=output_format, language_code=language_code, voice_settings=voice_settings, ) item: Dict[str, Any] = { "text": text, "voice_key": voice_key, "voice_label": voice_label, "voice_id": voice_id, "model_id": model_id, "output_format": output_format, "audio_path": audio_path, # local path "ui_url": ONLINE_UI_BASE, # UI online (list/play/download) } if upload_to_hf: uploaded = upload_audio_and_meta( audio_path, text=text, voice_key=voice_key, voice_label=voice_label, voice_id=voice_id, model_id=model_id, output_format=output_format, ) item.update(uploaded) # (tuỳ bạn) vẫn giữ query-param link cho tiện share, dù UI không cần item["ui_url_with_params"] = ( f"{ONLINE_UI_BASE}/" f"?text={quote_plus(text)}" f"&voice={quote_plus(voice_key)}" ) results.append(item) return { "status": "ok", "audios": results, } if __name__ == "__main__": # Khuyến nghị: đừng print thêm ra stdout, để Claude Desktop (STDIO) ổn định. mcp.run()