Spaces:

trung06042002
/

mcptts

Sleeping

App Files Files Community

Nguyen Trung commited on Jan 12

Commit

cee428a

1 Parent(s): 9e4d16b

Update server and requirements

Browse files

Files changed (13) hide show

.DS_Store +0 -0
.env.example +1 -0
.gitignore +12 -0
.python-version +1 -0
__pycache__/tts_core.cpython-311.pyc +0 -0
__pycache__/voices.cpython-311.pyc +0 -0
pyproject.toml +13 -0
requirements.txt +4 -0
server.py +150 -0
tts_core.py +85 -0
uv.lock +0 -0
voices.py +60 -0
voices.yaml +58 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ ELEVENLABS_API_KEY=your_real_api_key_here

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+.env
+__pycache__/
+.venv
+# macOS
+.DS_Store
+# outputs
+outputs/
+*.mp3
+*.wav

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

__pycache__/tts_core.cpython-311.pyc ADDED Viewed

Binary file (4.39 kB). View file

__pycache__/voices.cpython-311.pyc ADDED Viewed

Binary file (2.87 kB). View file

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[project]
+name = "mcp-tts"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "elevenlabs>=2.25.0",
+    "fastmcp>=2.13.3",
+    "numpy>=2.3.5",
+    "python-dotenv>=1.2.1",
+    "pyyaml>=6.0.3",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+elevenlabs>=2.25.0
+python-dotenv>=1.2.1
+pyyaml>=6.0.3
+numpy>=2.3.5

server.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# server.py
+from typing import Optional, List, Dict, Any
+from urllib.parse import quote_plus
+from fastmcp import FastMCP
+from tts_core import get_eleven_client, ensure_output_dir, generate_and_save_audio
+from voices import (
+    load_voices_map,
+    list_voices_data,
+    resolve_voice,
+    VOICES_MAP_PATH_DEFAULT,
+)
+ONLINE_UI_BASE = "https://trung06042002-mcptts.hf.space"
+mcp = FastMCP("elevenlabs-tts")
+@mcp.tool
+def list_voices(
+    voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
+) -> List[Dict[str, Any]]:
+    """
+    Liệt kê các voice khả dụng từ voices.yaml.
+    Trả về list:
+    - key: key dùng trong code (vd: 'sarah')
+    - voice_id: mã ElevenLabs
+    - label: tên hiển thị
+    """
+    voices_map = load_voices_map(voices_map_path)
+    return list_voices_data(voices_map)
+@mcp.tool
+def generate_tts(
+    text: str,
+    voices: Optional[List[str]] = None,
+    voice: Optional[str] = None,  # 1 giọng đơn
+    model_id: str = "eleven_turbo_v2",
+    output_dir: str = "./outputs",
+    output_format: str = "mp3_44100_128",
+    language_code: Optional[str] = None,
+    env_path: str = ".env",
+    voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
+    stability: float = 0.3,
+    similarity_boost: float = 0.7,
+    style: float = 0.8,
+    use_speaker_boost: bool = True,
+    speed: Optional[float] = None,
+) -> Dict[str, Any]:
+    """
+    Sinh 1 hoặc nhiều file TTS từ text.
+    - Nếu KHÔNG truyền 'voices' và cũng KHÔNG truyền 'voice':
+        -> Không sinh audio, trả:
+           {
+             "status": "need_voice_selection",
+             "available_voices": [...],
+             "message": "..."
+           }
+    - Nếu truyền 'voices' (list) -> sinh cho tất cả.
+    - Nếu truyền 'voice' (string) -> sinh cho 1 giọng.
+    """
+    voices_map = load_voices_map(voices_map_path)
+    # Xác định danh sách voice yêu cầu
+    requested: List[str] = []
+    if voices and len(voices) > 0:
+        requested.extend(voices)
+    elif voice:
+        requested.append(voice)
+    else:
+        # Không có voices / voice -> yêu cầu user chọn
+        return {
+            "status": "need_voice_selection",
+            "message": (
+                "No voice was specified. Please choose one or more voices from "
+                "'available_voices' and call generate_tts again with the 'voices' "
+                "parameter (or 'voice' for a single voice)."
+            ),
+            "available_voices": list_voices_data(voices_map),
+        }
+    # Chuẩn bị client & output dir
+    eleven = get_eleven_client(env_path)
+    base_output_dir = ensure_output_dir(output_dir)
+    voice_settings = {
+        "stability": stability,
+        "similarity_boost": similarity_boost,
+        "style": style,
+        "use_speaker_boost": use_speaker_boost,
+    }
+    if speed is not None:
+        voice_settings["speed"] = speed
+    results: List[Dict[str, Any]] = []
+    for v in requested:
+        resolved = resolve_voice(v, voices_map)
+        voice_id = resolved["voice_id"]
+        voice_key = resolved["voice_key"]
+        voice_label = resolved["voice_label"]
+        if not voice_id:
+            raise ValueError(
+                f"Could not resolve voice '{voice_key}' to a valid voice_id."
+            )
+        audio_path = generate_and_save_audio(
+            eleven=eleven,
+            text=text,
+            voice_id=voice_id,
+            model_id=model_id,
+            output_dir=base_output_dir,
+            output_format=output_format,
+            language_code=language_code,
+            voice_settings=voice_settings,
+        )
+        ui_url = (
+            f"{ONLINE_UI_BASE}/"
+            f"?text={quote_plus(text)}"
+            f"&voice={quote_plus(voice_key)}"
+        )
+        results.append(
+            {
+                "text": text,
+                "voice_key": voice_key,
+                "voice_label": voice_label,
+                "voice_id": voice_id,
+                "model_id": model_id,
+                "output_format": output_format,
+                "audio_path": audio_path,
+                "ui_url": ui_url,
+            }
+        )
+    return {
+        "status": "ok",
+        "audios": results,
+    }
+if __name__ == "__main__":
+    mcp.run()

tts_core.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# tts_core.py
+import os
+import re
+from typing import Optional, Dict, Any
+from dotenv import load_dotenv
+from elevenlabs.client import ElevenLabs
+def safe_filename(name: str) -> str:
+    """Convert text -> tên file an toàn."""
+    name = name.strip().lower()
+    name = re.sub(r"\s+", "_", name)
+    name = re.sub(r"[^a-z0-9_\-]", "", name)
+    return name or "audio"
+def ensure_output_dir(base_output_dir: str) -> str:
+    """
+    Chuẩn hóa output dir.
+    Nếu không phải môi trường SageMaker (/opt/ml/processing/),
+    thì thêm 'tts/elevenlabs'.
+    """
+    if "/opt/ml/processing/" not in base_output_dir:
+        base_output_dir = os.path.join(base_output_dir, "tts", "elevenlabs")
+    os.makedirs(base_output_dir, exist_ok=True)
+    return base_output_dir
+def get_eleven_client(env_path: str = ".env") -> ElevenLabs:
+    """Load .env và tạo ElevenLabs client."""
+    if os.path.exists(env_path):
+        load_dotenv(env_path)
+    else:
+        print(f"[env] .env not found at {env_path}, using current environment vars.")
+    api_key = os.getenv("ELEVENLABS_API_KEY")
+    if not api_key:
+        raise ValueError("Missing ELEVENLABS_API_KEY in environment or .env")
+    return ElevenLabs(api_key=api_key)
+def generate_and_save_audio(
+    eleven: ElevenLabs,
+    *,
+    text: str,
+    voice_id: str,
+    model_id: str,
+    output_dir: str,
+    output_format: str,
+    language_code: Optional[str],
+    voice_settings: Dict[str, Any],
+) -> str:
+    """
+    Gọi ElevenLabs TTS và lưu audio ra file (mp3/wav/... tuỳ output_format).
+    Return: đường dẫn file.
+    """
+    subfolder = safe_filename(text[:30])
+    ext = output_format.split("_")[0] if "_" in output_format else output_format
+    filename = f"{voice_id}.{ext}"
+    out_dir = os.path.join(output_dir, subfolder)
+    os.makedirs(out_dir, exist_ok=True)
+    output_path = os.path.join(out_dir, filename)
+    print(f"[tts] Generating: voice={voice_id}, model={model_id}, format={output_format}")
+    print(f"[tts] Text: {text[:80]}{'...' if len(text) > 80 else ''}")
+    audio_stream = eleven.text_to_speech.convert(
+        text=text,
+        voice_id=voice_id,
+        model_id=model_id,
+        output_format=output_format,
+        voice_settings=voice_settings,
+        language_code=language_code,
+        apply_text_normalization="auto",
+        apply_language_text_normalization=False,
+    )
+    with open(output_path, "wb") as f:
+        for chunk in audio_stream:
+            f.write(chunk)
+    print(f"[tts] Audio saved to: {output_path}")
+    return output_path

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

voices.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# voices.py
+import os
+from typing import Dict, Any, List
+import yaml
+VOICES_MAP_PATH_DEFAULT = "voices.yaml"
+def load_voices_map(path: str = VOICES_MAP_PATH_DEFAULT) -> Dict[str, Dict[str, Any]]:
+    """Load voices.yaml -> dict[voice_key] = {voice_id, label, ...}."""
+    if not os.path.exists(path):
+        print(f"[voices] voices.yaml not found at {path}. Using empty map.")
+        return {}
+    with open(path, "r", encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+    return data.get("voices", {})
+def list_voices_data(voices_map: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Convert voices_map -> list simple dict cho tool list_voices."""
+    return [
+        {
+            "key": key,
+            "voice_id": entry.get("voice_id"),
+            "label": entry.get("label", key),
+        }
+        for key, entry in voices_map.items()
+    ]
+def resolve_voice(
+    voice_param: str,
+    voices_map: Dict[str, Dict[str, Any]],
+) -> Dict[str, str]:
+    """
+    voice_param:
+      - 'sarah'  → key trong voices.yaml
+      - voice_id → nếu không tìm thấy trong voices.yaml
+    Return:
+      {
+        "voice_key": ...,
+        "voice_id": ...,
+        "voice_label": ...
+      }
+    """
+    entry = voices_map.get(voice_param)
+    if entry:
+        return {
+            "voice_key": voice_param,
+            "voice_id": entry.get("voice_id"),
+            "voice_label": entry.get("label", voice_param),
+        }
+    # Không có trong voices.yaml -> coi như voice_id thô
+    return {
+        "voice_key": "raw",
+        "voice_id": voice_param,
+        "voice_label": voice_param,
+    }

voices.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+voices:
+  charlie:
+    voice_id: IKne3meq5aSn9XLyUdCD
+    label: "Charlie"
+  george:
+    voice_id: JBFqnCBsd6RMkjVDRZzb
+    label: "George"
+  callum:
+    voice_id: N2lVS1w4EtoT3dr4eOWO
+    label: "Callum"
+  liam:
+    voice_id: TX3LPaxmHKxFdv7VOQHJ
+    label: "Liam"
+  will:
+    voice_id: bIHbv24MWmeRgasZH58o
+    label: "Will"
+  eric:
+    voice_id: cjVigY5qzO86Huf0OWal
+    label: "Eric"
+  chris:
+    voice_id: iP95p4xoKVk53GoZ742B
+    label: "Chris"
+  brian:
+    voice_id: nPczCjzI2devNBz1zQrb
+    label: "Brian"
+  daniel:
+    voice_id: onwK4e9ZLuTAKqWW03F9
+    label: "Daniel"
+  bill:
+    voice_id: pqHfZKP75CvOlQylNhV4
+    label: "Bill"
+  aria:
+    voice_id: 9BWtsMINqrJLrRacOk9x
+    label: "Aria"
+  sarah:
+    voice_id: EXAVITQu4vr4xnSDxMaL
+    label: "Sarah"
+  laura:
+    voice_id: FGY2WhTYpPnrIDTdsKH5
+    label: "Laura"
+  river:
+    voice_id: SAz9YHcvj6GT2YYXdXww
+    label: "River"
+  charlotte:
+    voice_id: XB0fDUnXU5powFXDhCwa
+    label: "Charlotte"
+  alice:
+    voice_id: Xb7hH8MSUJpSbSDYk0k2
+    label: "Alice"
+  matilda:
+    voice_id: XrExE9yKIg1WjnnlVkGX
+    label: "Matilda"
+  jessica:
+    voice_id: cgSgspJ2msm6clMCkdW9
+    label: "Jessica"
+  lily:
+    voice_id: pFZP5JQG7iQjIQuC4Bku
+    label: "Lily"