from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse, JSONResponse import edge_tts import io import asyncio app = FastAPI(title="Edge TTS OpenAI Compatible API - Multilingual") # ============================================================ # VOICE MAP : noms compatibles OpenAI -> voix Edge TTS # AnythingLLM envoie le nom du "model" comme champ "voice" # ============================================================ VOICE_MAP = { # ---------- FRANÇAIS ---------- "fr-denise": "fr-FR-DeniseNeural", # FR femme (naturelle) "fr-henri": "fr-FR-HenriNeural", # FR homme "fr-eloise": "fr-FR-EloiseNeural", # FR femme (jeune) "fr-ca-sylvie": "fr-CA-SylvieNeural", # FR Canada femme "fr-ca-antoine": "fr-CA-AntoineNeural", # FR Canada homme "fr-be-charline": "fr-BE-CharlineNeural", # FR Belgique femme "fr-be-gerard": "fr-BE-GerardNeural", # FR Belgique homme "fr-ch-ariane": "fr-CH-ArianeNeural", # FR Suisse femme "fr-ch-fabrice": "fr-CH-FabriceNeural", # FR Suisse homme # ---------- ENGLISH ---------- "alloy": "en-US-JennyNeural", # EN-US femme "echo": "en-US-GuyNeural", # EN-US homme "fable": "en-GB-SoniaNeural", # EN-GB femme "onyx": "en-US-ChristopherNeural", # EN-US homme (grave) "nova": "en-US-AriaNeural", # EN-US femme "shimmer": "en-US-AnaNeural", # EN-US femme (jeune) "en-jenny": "en-US-JennyNeural", "en-guy": "en-US-GuyNeural", "en-aria": "en-US-AriaNeural", "en-davis": "en-US-DavisNeural", "en-tony": "en-US-TonyNeural", # ---------- MULTILINGUE (ces voix parlent PLUSIEURS langues) ---------- "multi-jenny": "en-US-JennyMultilingualNeural", # ⭐ Multilingue "multi-ryan": "en-US-RyanMultilingualNeural", # ⭐ Multilingue "multi-ava": "en-US-AvaMultilingualNeural", # ⭐ Multilingue "multi-andrew": "en-US-AndrewMultilingualNeural", # ⭐ Multilingue "multi-emma": "en-US-EmmaMultilingualNeural", # ⭐ Multilingue "multi-brian": "en-US-BrianMultilingualNeural", # ⭐ Multilingue "multi-vivienne": "fr-FR-VivienneMultilingualNeural", # ⭐ FR Multilingue "multi-remy": "fr-FR-RemyMultilingualNeural", # ⭐ FR Multilingue # ---------- ESPAGNOL ---------- "es-elvira": "es-ES-ElviraNeural", "es-alvaro": "es-ES-AlvaroNeural", # ---------- ALLEMAND ---------- "de-katja": "de-DE-KatjaNeural", "de-conrad": "de-DE-ConradNeural", # ---------- ITALIEN ---------- "it-elsa": "it-IT-ElsaNeural", "it-diego": "it-IT-DiegoNeural", # ---------- PORTUGAIS ---------- "pt-francisca": "pt-BR-FranciscaNeural", "pt-antonio": "pt-BR-AntonioNeural", # ---------- JAPONAIS ---------- "ja-nanami": "ja-JP-NanamiNeural", "ja-keita": "ja-JP-KeitaNeural", # ---------- CHINOIS ---------- "zh-xiaoxiao": "zh-CN-XiaoxiaoNeural", "zh-yunyang": "zh-CN-YunyangNeural", # ---------- ARABE ---------- "ar-salma": "ar-SA-SalmaNeural", "ar-hamed": "ar-SA-HamedNeural", } @app.get("/") async def root(): return { "status": "ok", "message": "Edge TTS API - OpenAI Compatible - Multilingual", "voices_count": len(VOICE_MAP), "languages": ["fr-FR", "fr-CA", "fr-BE", "fr-CH", "en-US", "en-GB", "es-ES", "de-DE", "it-IT", "pt-BR", "ja-JP", "zh-CN", "ar-SA", "multilingual"] } # ============================================================ # /v1/models — AnythingLLM appelle cet endpoint pour lister # les voix disponibles dans le dropdown # ============================================================ @app.get("/v1/models") async def models(): model_list = [] for name, edge_voice in VOICE_MAP.items(): model_list.append({ "id": name, "object": "model", "owned_by": "edge-tts", "description": edge_voice }) return {"object": "list", "data": model_list} # ============================================================ # /v1/audio/speech — Endpoint principal compatible OpenAI TTS # ============================================================ @app.post("/v1/audio/speech") async def speech(request: Request): try: data = await request.json() text = data.get("input", "") voice = data.get("voice", "fr-denise") # défaut = français ! speed = float(data.get("speed", 1.0)) response_format = data.get("response_format", "mp3") if not text or not text.strip(): return JSONResponse( status_code=400, content={"error": "Le champ 'input' est vide"} ) # Résoudre la voix : soit un alias du map, soit un nom Edge TTS direct edge_voice = VOICE_MAP.get(voice, voice) # Calcul du rate if speed >= 1: rate = f"+{int((speed - 1) * 100)}%" else: rate = f"{int((speed - 1) * 100)}%" # Génération audio communicate = edge_tts.Communicate(text, edge_voice, rate=rate) audio_data = io.BytesIO() async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_data.write(chunk["data"]) audio_data.seek(0) # Content-type selon le format content_types = { "mp3": "audio/mpeg", "opus": "audio/opus", "aac": "audio/aac", "flac": "audio/flac", "wav": "audio/wav", } media_type = content_types.get(response_format, "audio/mpeg") return StreamingResponse(audio_data, media_type=media_type) except Exception as e: return JSONResponse( status_code=500, content={"error": str(e)} ) # ============================================================ # /v1/voices — Endpoint bonus pour lister les voix avec détails # ============================================================ @app.get("/v1/voices") async def list_voices(): """Liste toutes les voix avec leur langue et genre""" voices = [] for alias, edge_name in VOICE_MAP.items(): # Extraire la langue du nom Edge TTS parts = edge_name.split("-") if len(parts) >= 2: lang = f"{parts[0]}-{parts[1]}" else: lang = "unknown" voices.append({ "alias": alias, "edge_voice": edge_name, "language": lang, "multilingual": "Multilingual" in edge_name }) return {"voices": voices} # ============================================================ # /v1/voices/all — Liste TOUTES les voix Edge TTS disponibles # ============================================================ @app.get("/v1/voices/all") async def all_edge_voices(): """Récupère dynamiquement toutes les voix Edge TTS disponibles""" try: voices = await edge_tts.list_voices() return { "count": len(voices), "voices": voices } except Exception as e: return JSONResponse( status_code=500, content={"error": str(e)} ) # ============================================================ # /v1/voices/language/{lang} — Filtrer par langue # ============================================================ @app.get("/v1/voices/language/{lang}") async def voices_by_language(lang: str): """ Filtrer les voix par langue. Exemples: /v1/voices/language/fr ou /v1/voices/language/fr-FR """ try: all_voices = await edge_tts.list_voices() filtered = [ v for v in all_voices if v["Locale"].lower().startswith(lang.lower()) ] return { "language": lang, "count": len(filtered), "voices": filtered } except Exception as e: return JSONResponse( status_code=500, content={"error": str(e)} ) @app.get("/health") async def health(): return {"status": "healthy", "voices_loaded": len(VOICE_MAP)}