Spaces:

trung06042002
/

mcptts

Sleeping

mcptts / server.py

Nguyen Trung

45acd11 about 1 month ago

9.41 kB

	# server.py
	from __future__ import annotations

	from typing import Optional, List, Dict, Any
	from pathlib import Path
	from urllib.parse import quote_plus
	from datetime import datetime
	import os
	import json
	import uuid
	import mimetypes

	from fastmcp import FastMCP
	from huggingface_hub import HfApi

	from tts_core import get_eleven_client, ensure_output_dir, generate_and_save_audio
	from voices import (
	load_voices_map,
	list_voices_data,
	resolve_voice,
	VOICES_MAP_PATH_DEFAULT,
	)

	# ====== Hugging Face Space config (where Gradio UI is running) ======
	ONLINE_UI_BASE = "https://trung06042002-mcptts.hf.space"

	# Repo Space của bạn (đúng theo link)
	HF_SPACE_REPO_ID = "trung06042002/mcptts"
	HF_REPO_TYPE = "space"

	# Folder trong repo Space để chứa audio + metadata
	HF_AUDIO_DIR = "audios"
	HF_META_DIR = "meta"

	# Token upload (tạo trên HF settings/tokens, quyền write)
	HF_TOKEN_ENV = "HF_TOKEN"

	mcp = FastMCP("elevenlabs-tts")


	def _require_hf_token() -> str:
	token = os.getenv(HF_TOKEN_ENV)
	if not token:
	raise RuntimeError(
	f"Missing {HF_TOKEN_ENV}. Please export {HF_TOKEN_ENV}=<your_hf_write_token> "
	"before running the MCP server."
	)
	return token


	def _safe_stem(text: str, max_len: int = 40) -> str:
	s = "".join(c if c.isalnum() else "_" for c in text.strip().lower())
	s = "_".join([p for p in s.split("_") if p])
	return (s[:max_len] or "tts").rstrip("_")


	def _guess_ext_from_format(output_format: str) -> str:
	# output_format examples: "mp3_44100_128", "wav_44100"
	head = (output_format or "").split("_", 1)[0].lower()
	if head in {"mp3", "wav", "ogg", "flac", "m4a"}:
	return f".{head}"
	# fallback
	return ".mp3"


	def _upload_file_to_space(local_path: Path, path_in_repo: str, commit_message: str) -> None:
	token = _require_hf_token()
	api = HfApi(token=token)
	api.upload_file(
	path_or_fileobj=str(local_path),
	path_in_repo=path_in_repo,
	repo_id=HF_SPACE_REPO_ID,
	repo_type=HF_REPO_TYPE,
	commit_message=commit_message,
	)


	def upload_audio_and_meta(
	audio_path: str,
	*,
	text: str,
	voice_key: str,
	voice_label: str,
	voice_id: str,
	model_id: str,
	output_format: str,
	) -> Dict[str, str]:
	"""
	Upload audio + metadata JSON lên HF Space repo.
	Trả về:
	- hf_audio_path: path trong repo (vd: audios/xxx.mp3)
	- hf_meta_path: path trong repo (vd: meta/xxx.json)
	- hf_audio_url: URL để truy cập file qua hf.space
	- ui_url: URL UI (Gradio)
	"""
	p = Path(audio_path)
	if not p.exists():
	raise FileNotFoundError(f"Audio file not found: {audio_path}")

	# tạo tên file unique để tránh đè
	ext = p.suffix if p.suffix else _guess_ext_from_format(output_format)
	safe = _safe_stem(text)
	ts = datetime.now().strftime("%Y%m%d_%H%M%S")
	uid = uuid.uuid4().hex[:8]
	filename = f"{safe}_{voice_key}_{ts}_{uid}{ext}"

	hf_audio_path = f"{HF_AUDIO_DIR}/{filename}"
	hf_meta_path = f"{HF_META_DIR}/{filename}.json"

	# metadata
	size_bytes = p.stat().st_size
	mime = mimetypes.guess_type(filename)[0] or "application/octet-stream"
	meta = {
	"text": text,
	"voice_key": voice_key,
	"voice_label": voice_label,
	"voice_id": voice_id,
	"model_id": model_id,
	"output_format": output_format,
	"filename": filename,
	"size_bytes": size_bytes,
	"mime_type": mime,
	"created_at": ts,
	}

	tmp_meta = p.with_suffix(p.suffix + ".json.tmp")
	tmp_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

	# upload audio
	_upload_file_to_space(
	p,
	hf_audio_path,
	commit_message=f"Upload audio: {filename}",
	)
	# upload meta
	_upload_file_to_space(
	tmp_meta,
	hf_meta_path,
	commit_message=f"Upload meta: {filename}.json",
	)

	# cleanup temp meta
	try:
	tmp_meta.unlink(missing_ok=True) # py3.8+ has missing_ok
	except TypeError:
	if tmp_meta.exists():
	tmp_meta.unlink()

	# URL truy cập file trên hf.space (path-based)
	# Lưu ý: hf.space có route /file/<path_in_repo>
	hf_audio_url = f"{ONLINE_UI_BASE}/file/{hf_audio_path}"
	ui_url = ONLINE_UI_BASE # UI chỉ list/play/download; không cần query params nữa

	return {
	"hf_audio_path": hf_audio_path,
	"hf_meta_path": hf_meta_path,
	"hf_audio_url": hf_audio_url,
	"ui_url": ui_url,
	}


	@mcp.tool
	def list_voices(
	voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
	) -> List[Dict[str, Any]]:
	"""
	Liệt kê các voice khả dụng từ voices.yaml.

	Trả về list:
	- key: key dùng trong code (vd: 'sarah')
	- voice_id: mã ElevenLabs
	- label: tên hiển thị
	"""
	voices_map = load_voices_map(voices_map_path)
	return list_voices_data(voices_map)


	@mcp.tool
	def generate_tts(
	text: str,
	voices: Optional[List[str]] = None,
	voice: Optional[str] = None, # 1 giọng đơn
	model_id: str = "eleven_turbo_v2",
	output_dir: str = "./outputs",
	output_format: str = "mp3_44100_128",
	language_code: Optional[str] = None,
	env_path: str = ".env",
	voices_map_path: str = VOICES_MAP_PATH_DEFAULT,
	stability: float = 0.3,
	similarity_boost: float = 0.7,
	style: float = 0.8,
	use_speaker_boost: bool = True,
	speed: Optional[float] = None,
	upload_to_hf: bool = True, # ✅ NEW: có upload lên HF Space hay không
	) -> Dict[str, Any]:
	"""
	Sinh 1 hoặc nhiều file TTS từ text và (tuỳ chọn) upload lên Hugging Face Space để nghe/download online.

	- Nếu KHÔNG truyền 'voices' và cũng KHÔNG truyền 'voice':
	-> Không sinh audio, trả:
	{
	"status": "need_voice_selection",
	"available_voices": [...],
	"message": "..."
	}

	- Nếu truyền 'voices' (list) -> sinh cho tất cả.
	- Nếu truyền 'voice' (string) -> sinh cho 1 giọng.

	Nếu upload_to_hf=True:
	- yêu cầu env var HF_TOKEN (write token)
	- upload audio vào Space repo: audios/<file>
	- upload meta vào Space repo: meta/<file>.json
	- trả thêm hf_audio_url + ui_url
	"""
	voices_map = load_voices_map(voices_map_path)

	# Xác định danh sách voice yêu cầu
	requested: List[str] = []
	if voices and len(voices) > 0:
	requested.extend(voices)
	elif voice:
	requested.append(voice)
	else:
	return {
	"status": "need_voice_selection",
	"message": (
	"No voice was specified. Please choose one or more voices from "
	"'available_voices' and call generate_tts again with the 'voices' "
	"parameter (or 'voice' for a single voice)."
	),
	"available_voices": list_voices_data(voices_map),
	}

	# Chuẩn bị client & output dir (local)
	eleven = get_eleven_client(env_path)
	base_output_dir = ensure_output_dir(output_dir)

	voice_settings = {
	"stability": stability,
	"similarity_boost": similarity_boost,
	"style": style,
	"use_speaker_boost": use_speaker_boost,
	}
	if speed is not None:
	voice_settings["speed"] = speed

	results: List[Dict[str, Any]] = []

	for v in requested:
	resolved = resolve_voice(v, voices_map)
	voice_id = resolved["voice_id"]
	voice_key = resolved["voice_key"]
	voice_label = resolved["voice_label"]

	if not voice_id:
	raise ValueError(
	f"Could not resolve voice '{voice_key}' to a valid voice_id."
	)

	audio_path = generate_and_save_audio(
	eleven=eleven,
	text=text,
	voice_id=voice_id,
	model_id=model_id,
	output_dir=base_output_dir,
	output_format=output_format,
	language_code=language_code,
	voice_settings=voice_settings,
	)

	item: Dict[str, Any] = {
	"text": text,
	"voice_key": voice_key,
	"voice_label": voice_label,
	"voice_id": voice_id,
	"model_id": model_id,
	"output_format": output_format,
	"audio_path": audio_path, # local path
	"ui_url": ONLINE_UI_BASE, # UI online (list/play/download)
	}

	if upload_to_hf:
	uploaded = upload_audio_and_meta(
	audio_path,
	text=text,
	voice_key=voice_key,
	voice_label=voice_label,
	voice_id=voice_id,
	model_id=model_id,
	output_format=output_format,
	)
	item.update(uploaded)

	# (tuỳ bạn) vẫn giữ query-param link cho tiện share, dù UI không cần
	item["ui_url_with_params"] = (
	f"{ONLINE_UI_BASE}/"
	f"?text={quote_plus(text)}"
	f"&voice={quote_plus(voice_key)}"
	)

	results.append(item)

	return {
	"status": "ok",
	"audios": results,
	}


	if __name__ == "__main__":
	# Khuyến nghị: đừng print thêm ra stdout, để Claude Desktop (STDIO) ổn định.
	mcp.run()