Spaces:

trung06042002
/

mcptts

Sleeping

File size: 9,411 Bytes

# server.py
from __future__ import annotations

from typing import Optional, List, Dict, Any
from pathlib import Path
from urllib.parse import quote_plus
from datetime import datetime
import os
import json
import uuid
import mimetypes

from fastmcp import FastMCP
from huggingface_hub import HfApi

from tts_core import get_eleven_client, ensure_output_dir, generate_and_save_audio
from voices import (
    load_voices_map,
    list_voices_data,
    resolve_voice,
    VOICES_MAP_PATH_DEFAULT,
)

# ====== Hugging Face Space config (where Gradio UI is running) ======
ONLINE_UI_BASE = "https://trung06042002-mcptts.hf.space"

# Repo Space của bạn (đúng theo link)
HF_SPACE_REPO_ID = "trung06042002/mcptts"
HF_REPO_TYPE = "space"

# Folder trong repo Space để chứa audio + metadata
HF_AUDIO_DIR = "audios"
HF_META_DIR = "meta"

# Token upload (tạo trên HF settings/tokens, quyền write)
HF_TOKEN_ENV = "HF_TOKEN"

mcp = FastMCP("elevenlabs-tts")


def _require_hf_token() -> str:
    token = os.getenv(HF_TOKEN_ENV)
    if not token:
        raise RuntimeError(
            f"Missing {HF_TOKEN_ENV}. Please export {HF_TOKEN_ENV}=<your_hf_write_token> "
            "before running the MCP server."
        )
    return token


def _safe_stem(text: str, max_len: int = 40) -> str:
    s = "".join(c if c.isalnum() else "_" for c in text.strip().lower())
    s = "_".join([p for p in s.split("_") if p])
    return (s[:max_len] or "tts").rstrip("_")


def _guess_ext_from_format(output_format: str) -> str:
    # output_format examples: "mp3_44100_128", "wav_44100"
    head = (output_format or "").split("_", 1)[0].lower()
    if head in {"mp3", "wav", "ogg", "flac", "m4a"}:
        return f".{head}"
    # fallback
    return ".mp3"


def _upload_file_to_space(local_path: Path, path_in_repo: str, commit_message: str) -> None:
    token = _require_hf_token()
    api = HfApi(token=token)
    api.upload_file(
        path_or_fileobj=str(local_path),
        path_in_repo=path_in_repo,
        repo_id=HF_SPACE_REPO_ID,
        repo_type=HF_REPO_TYPE,
        commit_message=commit_message,
    )


def upload_audio_and_meta(
    audio_path: str,
    *,
    text: str,
    voice_key: str,
    voice_label: str,
    voice_id: str,
    model_id: str,
    output_format: str,
) -> Dict[str, str]:
    """
    Upload audio + metadata JSON lên HF Space repo.
    Trả về:
      - hf_audio_path: path trong repo (vd: audios/xxx.mp3)
      - hf_meta_path:  path trong repo (vd: meta/xxx.json)
      - hf_audio_url:  URL để truy cập file qua hf.space
      - ui_url:        URL UI (Gradio)
    """
    p = Path(audio_path)
    if not p.exists():
        raise FileNotFoundError(f"Audio file not found: {audio_path}")

    # tạo tên file unique để tránh đè
    ext = p.suffix if p.suffix else _guess_ext_from_format(output_format)
    safe = _safe_stem(text)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    uid = uuid.uuid4().hex[:8]
    filename = f"{safe}_{voice_key}_{ts}_{uid}{ext}"

    hf_audio_path = f"{HF_AUDIO_DIR}/{filename}"
    hf_meta_path = f"{HF_META_DIR}/{filename}.json"

    # metadata
    size_bytes = p.stat().st_size
    mime = mimetypes.guess_type(filename)[0] or "application/octet-stream"
    meta = {
        "text": text,
        "voice_key": voice_key,
        "voice_label": voice_label,
        "voice_id": voice_id,
        "model_id": model_id,
        "output_format": output_format,
        "filename": filename,
        "size_bytes": size_bytes,
        "mime_type": mime,
        "created_at": ts,
    }

    tmp_meta = p.with_suffix(p.suffix + ".json.tmp")
    tmp_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

    # upload audio
    _upload_file_to_space(
        p,
        hf_audio_path,
        commit_message=f"Upload audio: {filename}",
    )
    # upload meta
    _upload_file_to_space(
        tmp_meta,
        hf_meta_path,
        commit_message=f"Upload meta: {filename}.json",
    )

    # cleanup temp meta
    try:
        tmp_meta.unlink(missing_ok=True)  # py3.8+ has missing_ok
    except TypeError:
        if tmp_meta.exists():
            tmp_meta.unlink()

    # URL truy cập file trên hf.space (path-based)
    # Lưu ý: hf.space có route /file/<path_in_repo>
    hf_audio_url = f"{ONLINE_UI_BASE}/file/{hf_audio_path}"
    ui_url = ONLINE_UI_BASE  # UI chỉ list/play/download; không cần query params nữa

    return {
        "hf_audio_path": hf_audio_path,
        "hf_meta_path": hf_meta_path,
        "hf_audio_url": hf_audio_url,
        "ui_url": ui_url,
    }


@mcp.tool
def list_voices(
    voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
) -> List[Dict[str, Any]]:
    """
    Liệt kê các voice khả dụng từ voices.yaml.

    Trả về list:
    - key: key dùng trong code (vd: 'sarah')
    - voice_id: mã ElevenLabs
    - label: tên hiển thị
    """
    voices_map = load_voices_map(voices_map_path)
    return list_voices_data(voices_map)


@mcp.tool
def generate_tts(
    text: str,
    voices: Optional[List[str]] = None,
    voice: Optional[str] = None,  # 1 giọng đơn
    model_id: str = "eleven_turbo_v2",
    output_dir: str = "./outputs",
    output_format: str = "mp3_44100_128",
    language_code: Optional[str] = None,
    env_path: str = ".env",
    voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
    stability: float = 0.3,
    similarity_boost: float = 0.7,
    style: float = 0.8,
    use_speaker_boost: bool = True,
    speed: Optional[float] = None,
    upload_to_hf: bool = True,  # ✅ NEW: có upload lên HF Space hay không
) -> Dict[str, Any]:
    """
    Sinh 1 hoặc nhiều file TTS từ text và (tuỳ chọn) upload lên Hugging Face Space để nghe/download online.

    - Nếu KHÔNG truyền 'voices' và cũng KHÔNG truyền 'voice':
        -> Không sinh audio, trả:
           {
             "status": "need_voice_selection",
             "available_voices": [...],
             "message": "..."
           }

    - Nếu truyền 'voices' (list) -> sinh cho tất cả.
    - Nếu truyền 'voice' (string) -> sinh cho 1 giọng.

    Nếu upload_to_hf=True:
      - yêu cầu env var HF_TOKEN (write token)
      - upload audio vào Space repo: audios/<file>
      - upload meta  vào Space repo: meta/<file>.json
      - trả thêm hf_audio_url + ui_url
    """
    voices_map = load_voices_map(voices_map_path)

    # Xác định danh sách voice yêu cầu
    requested: List[str] = []
    if voices and len(voices) > 0:
        requested.extend(voices)
    elif voice:
        requested.append(voice)
    else:
        return {
            "status": "need_voice_selection",
            "message": (
                "No voice was specified. Please choose one or more voices from "
                "'available_voices' and call generate_tts again with the 'voices' "
                "parameter (or 'voice' for a single voice)."
            ),
            "available_voices": list_voices_data(voices_map),
        }

    # Chuẩn bị client & output dir (local)
    eleven = get_eleven_client(env_path)
    base_output_dir = ensure_output_dir(output_dir)

    voice_settings = {
        "stability": stability,
        "similarity_boost": similarity_boost,
        "style": style,
        "use_speaker_boost": use_speaker_boost,
    }
    if speed is not None:
        voice_settings["speed"] = speed

    results: List[Dict[str, Any]] = []

    for v in requested:
        resolved = resolve_voice(v, voices_map)
        voice_id = resolved["voice_id"]
        voice_key = resolved["voice_key"]
        voice_label = resolved["voice_label"]

        if not voice_id:
            raise ValueError(
                f"Could not resolve voice '{voice_key}' to a valid voice_id."
            )

        audio_path = generate_and_save_audio(
            eleven=eleven,
            text=text,
            voice_id=voice_id,
            model_id=model_id,
            output_dir=base_output_dir,
            output_format=output_format,
            language_code=language_code,
            voice_settings=voice_settings,
        )

        item: Dict[str, Any] = {
            "text": text,
            "voice_key": voice_key,
            "voice_label": voice_label,
            "voice_id": voice_id,
            "model_id": model_id,
            "output_format": output_format,
            "audio_path": audio_path,        # local path
            "ui_url": ONLINE_UI_BASE,        # UI online (list/play/download)
        }

        if upload_to_hf:
            uploaded = upload_audio_and_meta(
                audio_path,
                text=text,
                voice_key=voice_key,
                voice_label=voice_label,
                voice_id=voice_id,
                model_id=model_id,
                output_format=output_format,
            )
            item.update(uploaded)

        # (tuỳ bạn) vẫn giữ query-param link cho tiện share, dù UI không cần
        item["ui_url_with_params"] = (
            f"{ONLINE_UI_BASE}/"
            f"?text={quote_plus(text)}"
            f"&voice={quote_plus(voice_key)}"
        )

        results.append(item)

    return {
        "status": "ok",
        "audios": results,
    }


if __name__ == "__main__":
    # Khuyến nghị: đừng print thêm ra stdout, để Claude Desktop (STDIO) ổn định.
    mcp.run()