Voxxium's picture
Update app.py
9e5940d verified
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
import edge_tts
import io
import asyncio
app = FastAPI(title="Edge TTS OpenAI Compatible API - Multilingual")
# ============================================================
# VOICE MAP : noms compatibles OpenAI -> voix Edge TTS
# AnythingLLM envoie le nom du "model" comme champ "voice"
# ============================================================
VOICE_MAP = {
# ---------- FRANÇAIS ----------
"fr-denise": "fr-FR-DeniseNeural", # FR femme (naturelle)
"fr-henri": "fr-FR-HenriNeural", # FR homme
"fr-eloise": "fr-FR-EloiseNeural", # FR femme (jeune)
"fr-ca-sylvie": "fr-CA-SylvieNeural", # FR Canada femme
"fr-ca-antoine": "fr-CA-AntoineNeural", # FR Canada homme
"fr-be-charline": "fr-BE-CharlineNeural", # FR Belgique femme
"fr-be-gerard": "fr-BE-GerardNeural", # FR Belgique homme
"fr-ch-ariane": "fr-CH-ArianeNeural", # FR Suisse femme
"fr-ch-fabrice": "fr-CH-FabriceNeural", # FR Suisse homme
# ---------- ENGLISH ----------
"alloy": "en-US-JennyNeural", # EN-US femme
"echo": "en-US-GuyNeural", # EN-US homme
"fable": "en-GB-SoniaNeural", # EN-GB femme
"onyx": "en-US-ChristopherNeural", # EN-US homme (grave)
"nova": "en-US-AriaNeural", # EN-US femme
"shimmer": "en-US-AnaNeural", # EN-US femme (jeune)
"en-jenny": "en-US-JennyNeural",
"en-guy": "en-US-GuyNeural",
"en-aria": "en-US-AriaNeural",
"en-davis": "en-US-DavisNeural",
"en-tony": "en-US-TonyNeural",
# ---------- MULTILINGUE (ces voix parlent PLUSIEURS langues) ----------
"multi-jenny": "en-US-JennyMultilingualNeural", # ⭐ Multilingue
"multi-ryan": "en-US-RyanMultilingualNeural", # ⭐ Multilingue
"multi-ava": "en-US-AvaMultilingualNeural", # ⭐ Multilingue
"multi-andrew": "en-US-AndrewMultilingualNeural", # ⭐ Multilingue
"multi-emma": "en-US-EmmaMultilingualNeural", # ⭐ Multilingue
"multi-brian": "en-US-BrianMultilingualNeural", # ⭐ Multilingue
"multi-vivienne": "fr-FR-VivienneMultilingualNeural", # ⭐ FR Multilingue
"multi-remy": "fr-FR-RemyMultilingualNeural", # ⭐ FR Multilingue
# ---------- ESPAGNOL ----------
"es-elvira": "es-ES-ElviraNeural",
"es-alvaro": "es-ES-AlvaroNeural",
# ---------- ALLEMAND ----------
"de-katja": "de-DE-KatjaNeural",
"de-conrad": "de-DE-ConradNeural",
# ---------- ITALIEN ----------
"it-elsa": "it-IT-ElsaNeural",
"it-diego": "it-IT-DiegoNeural",
# ---------- PORTUGAIS ----------
"pt-francisca": "pt-BR-FranciscaNeural",
"pt-antonio": "pt-BR-AntonioNeural",
# ---------- JAPONAIS ----------
"ja-nanami": "ja-JP-NanamiNeural",
"ja-keita": "ja-JP-KeitaNeural",
# ---------- CHINOIS ----------
"zh-xiaoxiao": "zh-CN-XiaoxiaoNeural",
"zh-yunyang": "zh-CN-YunyangNeural",
# ---------- ARABE ----------
"ar-salma": "ar-SA-SalmaNeural",
"ar-hamed": "ar-SA-HamedNeural",
}
@app.get("/")
async def root():
return {
"status": "ok",
"message": "Edge TTS API - OpenAI Compatible - Multilingual",
"voices_count": len(VOICE_MAP),
"languages": ["fr-FR", "fr-CA", "fr-BE", "fr-CH",
"en-US", "en-GB", "es-ES", "de-DE",
"it-IT", "pt-BR", "ja-JP", "zh-CN", "ar-SA",
"multilingual"]
}
# ============================================================
# /v1/models — AnythingLLM appelle cet endpoint pour lister
# les voix disponibles dans le dropdown
# ============================================================
@app.get("/v1/models")
async def models():
model_list = []
for name, edge_voice in VOICE_MAP.items():
model_list.append({
"id": name,
"object": "model",
"owned_by": "edge-tts",
"description": edge_voice
})
return {"object": "list", "data": model_list}
# ============================================================
# /v1/audio/speech — Endpoint principal compatible OpenAI TTS
# ============================================================
@app.post("/v1/audio/speech")
async def speech(request: Request):
try:
data = await request.json()
text = data.get("input", "")
voice = data.get("voice", "fr-denise") # défaut = français !
speed = float(data.get("speed", 1.0))
response_format = data.get("response_format", "mp3")
if not text or not text.strip():
return JSONResponse(
status_code=400,
content={"error": "Le champ 'input' est vide"}
)
# Résoudre la voix : soit un alias du map, soit un nom Edge TTS direct
edge_voice = VOICE_MAP.get(voice, voice)
# Calcul du rate
if speed >= 1:
rate = f"+{int((speed - 1) * 100)}%"
else:
rate = f"{int((speed - 1) * 100)}%"
# Génération audio
communicate = edge_tts.Communicate(text, edge_voice, rate=rate)
audio_data = io.BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data.write(chunk["data"])
audio_data.seek(0)
# Content-type selon le format
content_types = {
"mp3": "audio/mpeg",
"opus": "audio/opus",
"aac": "audio/aac",
"flac": "audio/flac",
"wav": "audio/wav",
}
media_type = content_types.get(response_format, "audio/mpeg")
return StreamingResponse(audio_data, media_type=media_type)
except Exception as e:
return JSONResponse(
status_code=500,
content={"error": str(e)}
)
# ============================================================
# /v1/voices — Endpoint bonus pour lister les voix avec détails
# ============================================================
@app.get("/v1/voices")
async def list_voices():
"""Liste toutes les voix avec leur langue et genre"""
voices = []
for alias, edge_name in VOICE_MAP.items():
# Extraire la langue du nom Edge TTS
parts = edge_name.split("-")
if len(parts) >= 2:
lang = f"{parts[0]}-{parts[1]}"
else:
lang = "unknown"
voices.append({
"alias": alias,
"edge_voice": edge_name,
"language": lang,
"multilingual": "Multilingual" in edge_name
})
return {"voices": voices}
# ============================================================
# /v1/voices/all — Liste TOUTES les voix Edge TTS disponibles
# ============================================================
@app.get("/v1/voices/all")
async def all_edge_voices():
"""Récupère dynamiquement toutes les voix Edge TTS disponibles"""
try:
voices = await edge_tts.list_voices()
return {
"count": len(voices),
"voices": voices
}
except Exception as e:
return JSONResponse(
status_code=500,
content={"error": str(e)}
)
# ============================================================
# /v1/voices/language/{lang} — Filtrer par langue
# ============================================================
@app.get("/v1/voices/language/{lang}")
async def voices_by_language(lang: str):
"""
Filtrer les voix par langue.
Exemples: /v1/voices/language/fr ou /v1/voices/language/fr-FR
"""
try:
all_voices = await edge_tts.list_voices()
filtered = [
v for v in all_voices
if v["Locale"].lower().startswith(lang.lower())
]
return {
"language": lang,
"count": len(filtered),
"voices": filtered
}
except Exception as e:
return JSONResponse(
status_code=500,
content={"error": str(e)}
)
@app.get("/health")
async def health():
return {"status": "healthy", "voices_loaded": len(VOICE_MAP)}