Nguyen Trung commited on
Commit
cee428a
·
1 Parent(s): 9e4d16b

Update server and requirements

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ ELEVENLABS_API_KEY=your_real_api_key_here
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ .venv
4
+
5
+ # macOS
6
+ .DS_Store
7
+
8
+ # outputs
9
+ outputs/
10
+ *.mp3
11
+ *.wav
12
+
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
__pycache__/tts_core.cpython-311.pyc ADDED
Binary file (4.39 kB). View file
 
__pycache__/voices.cpython-311.pyc ADDED
Binary file (2.87 kB). View file
 
pyproject.toml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "mcp-tts"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "elevenlabs>=2.25.0",
9
+ "fastmcp>=2.13.3",
10
+ "numpy>=2.3.5",
11
+ "python-dotenv>=1.2.1",
12
+ "pyyaml>=6.0.3",
13
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ elevenlabs>=2.25.0
2
+ python-dotenv>=1.2.1
3
+ pyyaml>=6.0.3
4
+ numpy>=2.3.5
server.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # server.py
2
+ from typing import Optional, List, Dict, Any
3
+ from urllib.parse import quote_plus
4
+
5
+ from fastmcp import FastMCP
6
+
7
+ from tts_core import get_eleven_client, ensure_output_dir, generate_and_save_audio
8
+ from voices import (
9
+ load_voices_map,
10
+ list_voices_data,
11
+ resolve_voice,
12
+ VOICES_MAP_PATH_DEFAULT,
13
+ )
14
+
15
+ ONLINE_UI_BASE = "https://trung06042002-mcptts.hf.space"
16
+
17
+ mcp = FastMCP("elevenlabs-tts")
18
+
19
+
20
+ @mcp.tool
21
+ def list_voices(
22
+ voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
23
+ ) -> List[Dict[str, Any]]:
24
+ """
25
+ Liệt kê các voice khả dụng từ voices.yaml.
26
+
27
+ Trả về list:
28
+ - key: key dùng trong code (vd: 'sarah')
29
+ - voice_id: mã ElevenLabs
30
+ - label: tên hiển thị
31
+ """
32
+ voices_map = load_voices_map(voices_map_path)
33
+ return list_voices_data(voices_map)
34
+
35
+
36
+ @mcp.tool
37
+ def generate_tts(
38
+ text: str,
39
+ voices: Optional[List[str]] = None,
40
+ voice: Optional[str] = None, # 1 giọng đơn
41
+ model_id: str = "eleven_turbo_v2",
42
+ output_dir: str = "./outputs",
43
+ output_format: str = "mp3_44100_128",
44
+ language_code: Optional[str] = None,
45
+ env_path: str = ".env",
46
+ voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
47
+ stability: float = 0.3,
48
+ similarity_boost: float = 0.7,
49
+ style: float = 0.8,
50
+ use_speaker_boost: bool = True,
51
+ speed: Optional[float] = None,
52
+ ) -> Dict[str, Any]:
53
+ """
54
+ Sinh 1 hoặc nhiều file TTS từ text.
55
+
56
+ - Nếu KHÔNG truyền 'voices' và cũng KHÔNG truyền 'voice':
57
+ -> Không sinh audio, trả:
58
+ {
59
+ "status": "need_voice_selection",
60
+ "available_voices": [...],
61
+ "message": "..."
62
+ }
63
+
64
+ - Nếu truyền 'voices' (list) -> sinh cho tất cả.
65
+ - Nếu truyền 'voice' (string) -> sinh cho 1 giọng.
66
+ """
67
+ voices_map = load_voices_map(voices_map_path)
68
+
69
+ # Xác định danh sách voice yêu cầu
70
+ requested: List[str] = []
71
+ if voices and len(voices) > 0:
72
+ requested.extend(voices)
73
+ elif voice:
74
+ requested.append(voice)
75
+ else:
76
+ # Không có voices / voice -> yêu cầu user chọn
77
+ return {
78
+ "status": "need_voice_selection",
79
+ "message": (
80
+ "No voice was specified. Please choose one or more voices from "
81
+ "'available_voices' and call generate_tts again with the 'voices' "
82
+ "parameter (or 'voice' for a single voice)."
83
+ ),
84
+ "available_voices": list_voices_data(voices_map),
85
+ }
86
+
87
+ # Chuẩn bị client & output dir
88
+ eleven = get_eleven_client(env_path)
89
+ base_output_dir = ensure_output_dir(output_dir)
90
+
91
+ voice_settings = {
92
+ "stability": stability,
93
+ "similarity_boost": similarity_boost,
94
+ "style": style,
95
+ "use_speaker_boost": use_speaker_boost,
96
+ }
97
+ if speed is not None:
98
+ voice_settings["speed"] = speed
99
+
100
+ results: List[Dict[str, Any]] = []
101
+
102
+ for v in requested:
103
+ resolved = resolve_voice(v, voices_map)
104
+ voice_id = resolved["voice_id"]
105
+ voice_key = resolved["voice_key"]
106
+ voice_label = resolved["voice_label"]
107
+
108
+ if not voice_id:
109
+ raise ValueError(
110
+ f"Could not resolve voice '{voice_key}' to a valid voice_id."
111
+ )
112
+
113
+ audio_path = generate_and_save_audio(
114
+ eleven=eleven,
115
+ text=text,
116
+ voice_id=voice_id,
117
+ model_id=model_id,
118
+ output_dir=base_output_dir,
119
+ output_format=output_format,
120
+ language_code=language_code,
121
+ voice_settings=voice_settings,
122
+ )
123
+
124
+ ui_url = (
125
+ f"{ONLINE_UI_BASE}/"
126
+ f"?text={quote_plus(text)}"
127
+ f"&voice={quote_plus(voice_key)}"
128
+ )
129
+
130
+ results.append(
131
+ {
132
+ "text": text,
133
+ "voice_key": voice_key,
134
+ "voice_label": voice_label,
135
+ "voice_id": voice_id,
136
+ "model_id": model_id,
137
+ "output_format": output_format,
138
+ "audio_path": audio_path,
139
+ "ui_url": ui_url,
140
+ }
141
+ )
142
+
143
+ return {
144
+ "status": "ok",
145
+ "audios": results,
146
+ }
147
+
148
+
149
+ if __name__ == "__main__":
150
+ mcp.run()
tts_core.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_core.py
2
+ import os
3
+ import re
4
+ from typing import Optional, Dict, Any
5
+
6
+ from dotenv import load_dotenv
7
+ from elevenlabs.client import ElevenLabs
8
+
9
+
10
+ def safe_filename(name: str) -> str:
11
+ """Convert text -> tên file an toàn."""
12
+ name = name.strip().lower()
13
+ name = re.sub(r"\s+", "_", name)
14
+ name = re.sub(r"[^a-z0-9_\-]", "", name)
15
+ return name or "audio"
16
+
17
+
18
+ def ensure_output_dir(base_output_dir: str) -> str:
19
+ """
20
+ Chuẩn hóa output dir.
21
+ Nếu không phải môi trường SageMaker (/opt/ml/processing/),
22
+ thì thêm 'tts/elevenlabs'.
23
+ """
24
+ if "/opt/ml/processing/" not in base_output_dir:
25
+ base_output_dir = os.path.join(base_output_dir, "tts", "elevenlabs")
26
+ os.makedirs(base_output_dir, exist_ok=True)
27
+ return base_output_dir
28
+
29
+
30
+ def get_eleven_client(env_path: str = ".env") -> ElevenLabs:
31
+ """Load .env và tạo ElevenLabs client."""
32
+ if os.path.exists(env_path):
33
+ load_dotenv(env_path)
34
+ else:
35
+ print(f"[env] .env not found at {env_path}, using current environment vars.")
36
+
37
+ api_key = os.getenv("ELEVENLABS_API_KEY")
38
+ if not api_key:
39
+ raise ValueError("Missing ELEVENLABS_API_KEY in environment or .env")
40
+
41
+ return ElevenLabs(api_key=api_key)
42
+
43
+
44
+ def generate_and_save_audio(
45
+ eleven: ElevenLabs,
46
+ *,
47
+ text: str,
48
+ voice_id: str,
49
+ model_id: str,
50
+ output_dir: str,
51
+ output_format: str,
52
+ language_code: Optional[str],
53
+ voice_settings: Dict[str, Any],
54
+ ) -> str:
55
+ """
56
+ Gọi ElevenLabs TTS và lưu audio ra file (mp3/wav/... tuỳ output_format).
57
+ Return: đường dẫn file.
58
+ """
59
+ subfolder = safe_filename(text[:30])
60
+ ext = output_format.split("_")[0] if "_" in output_format else output_format
61
+ filename = f"{voice_id}.{ext}"
62
+ out_dir = os.path.join(output_dir, subfolder)
63
+ os.makedirs(out_dir, exist_ok=True)
64
+ output_path = os.path.join(out_dir, filename)
65
+
66
+ print(f"[tts] Generating: voice={voice_id}, model={model_id}, format={output_format}")
67
+ print(f"[tts] Text: {text[:80]}{'...' if len(text) > 80 else ''}")
68
+
69
+ audio_stream = eleven.text_to_speech.convert(
70
+ text=text,
71
+ voice_id=voice_id,
72
+ model_id=model_id,
73
+ output_format=output_format,
74
+ voice_settings=voice_settings,
75
+ language_code=language_code,
76
+ apply_text_normalization="auto",
77
+ apply_language_text_normalization=False,
78
+ )
79
+
80
+ with open(output_path, "wb") as f:
81
+ for chunk in audio_stream:
82
+ f.write(chunk)
83
+
84
+ print(f"[tts] Audio saved to: {output_path}")
85
+ return output_path
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
voices.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # voices.py
2
+ import os
3
+ from typing import Dict, Any, List
4
+
5
+ import yaml
6
+
7
+ VOICES_MAP_PATH_DEFAULT = "voices.yaml"
8
+
9
+
10
+ def load_voices_map(path: str = VOICES_MAP_PATH_DEFAULT) -> Dict[str, Dict[str, Any]]:
11
+ """Load voices.yaml -> dict[voice_key] = {voice_id, label, ...}."""
12
+ if not os.path.exists(path):
13
+ print(f"[voices] voices.yaml not found at {path}. Using empty map.")
14
+ return {}
15
+ with open(path, "r", encoding="utf-8") as f:
16
+ data = yaml.safe_load(f) or {}
17
+ return data.get("voices", {})
18
+
19
+
20
+ def list_voices_data(voices_map: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
21
+ """Convert voices_map -> list simple dict cho tool list_voices."""
22
+ return [
23
+ {
24
+ "key": key,
25
+ "voice_id": entry.get("voice_id"),
26
+ "label": entry.get("label", key),
27
+ }
28
+ for key, entry in voices_map.items()
29
+ ]
30
+
31
+
32
+ def resolve_voice(
33
+ voice_param: str,
34
+ voices_map: Dict[str, Dict[str, Any]],
35
+ ) -> Dict[str, str]:
36
+ """
37
+ voice_param:
38
+ - 'sarah' → key trong voices.yaml
39
+ - voice_id → nếu không tìm thấy trong voices.yaml
40
+
41
+ Return:
42
+ {
43
+ "voice_key": ...,
44
+ "voice_id": ...,
45
+ "voice_label": ...
46
+ }
47
+ """
48
+ entry = voices_map.get(voice_param)
49
+ if entry:
50
+ return {
51
+ "voice_key": voice_param,
52
+ "voice_id": entry.get("voice_id"),
53
+ "voice_label": entry.get("label", voice_param),
54
+ }
55
+ # Không có trong voices.yaml -> coi như voice_id thô
56
+ return {
57
+ "voice_key": "raw",
58
+ "voice_id": voice_param,
59
+ "voice_label": voice_param,
60
+ }
voices.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ voices:
2
+ charlie:
3
+ voice_id: IKne3meq5aSn9XLyUdCD
4
+ label: "Charlie"
5
+ george:
6
+ voice_id: JBFqnCBsd6RMkjVDRZzb
7
+ label: "George"
8
+ callum:
9
+ voice_id: N2lVS1w4EtoT3dr4eOWO
10
+ label: "Callum"
11
+ liam:
12
+ voice_id: TX3LPaxmHKxFdv7VOQHJ
13
+ label: "Liam"
14
+ will:
15
+ voice_id: bIHbv24MWmeRgasZH58o
16
+ label: "Will"
17
+ eric:
18
+ voice_id: cjVigY5qzO86Huf0OWal
19
+ label: "Eric"
20
+ chris:
21
+ voice_id: iP95p4xoKVk53GoZ742B
22
+ label: "Chris"
23
+ brian:
24
+ voice_id: nPczCjzI2devNBz1zQrb
25
+ label: "Brian"
26
+ daniel:
27
+ voice_id: onwK4e9ZLuTAKqWW03F9
28
+ label: "Daniel"
29
+ bill:
30
+ voice_id: pqHfZKP75CvOlQylNhV4
31
+ label: "Bill"
32
+ aria:
33
+ voice_id: 9BWtsMINqrJLrRacOk9x
34
+ label: "Aria"
35
+ sarah:
36
+ voice_id: EXAVITQu4vr4xnSDxMaL
37
+ label: "Sarah"
38
+ laura:
39
+ voice_id: FGY2WhTYpPnrIDTdsKH5
40
+ label: "Laura"
41
+ river:
42
+ voice_id: SAz9YHcvj6GT2YYXdXww
43
+ label: "River"
44
+ charlotte:
45
+ voice_id: XB0fDUnXU5powFXDhCwa
46
+ label: "Charlotte"
47
+ alice:
48
+ voice_id: Xb7hH8MSUJpSbSDYk0k2
49
+ label: "Alice"
50
+ matilda:
51
+ voice_id: XrExE9yKIg1WjnnlVkGX
52
+ label: "Matilda"
53
+ jessica:
54
+ voice_id: cgSgspJ2msm6clMCkdW9
55
+ label: "Jessica"
56
+ lily:
57
+ voice_id: pFZP5JQG7iQjIQuC4Bku
58
+ label: "Lily"