Spaces:
Running
Running
| from fastapi import FastAPI, Request | |
| from fastapi.responses import StreamingResponse, JSONResponse | |
| import edge_tts | |
| import io | |
| import asyncio | |
| app = FastAPI(title="Edge TTS OpenAI Compatible API - Multilingual") | |
| # ============================================================ | |
| # VOICE MAP : noms compatibles OpenAI -> voix Edge TTS | |
| # AnythingLLM envoie le nom du "model" comme champ "voice" | |
| # ============================================================ | |
| VOICE_MAP = { | |
| # ---------- FRANÇAIS ---------- | |
| "fr-denise": "fr-FR-DeniseNeural", # FR femme (naturelle) | |
| "fr-henri": "fr-FR-HenriNeural", # FR homme | |
| "fr-eloise": "fr-FR-EloiseNeural", # FR femme (jeune) | |
| "fr-ca-sylvie": "fr-CA-SylvieNeural", # FR Canada femme | |
| "fr-ca-antoine": "fr-CA-AntoineNeural", # FR Canada homme | |
| "fr-be-charline": "fr-BE-CharlineNeural", # FR Belgique femme | |
| "fr-be-gerard": "fr-BE-GerardNeural", # FR Belgique homme | |
| "fr-ch-ariane": "fr-CH-ArianeNeural", # FR Suisse femme | |
| "fr-ch-fabrice": "fr-CH-FabriceNeural", # FR Suisse homme | |
| # ---------- ENGLISH ---------- | |
| "alloy": "en-US-JennyNeural", # EN-US femme | |
| "echo": "en-US-GuyNeural", # EN-US homme | |
| "fable": "en-GB-SoniaNeural", # EN-GB femme | |
| "onyx": "en-US-ChristopherNeural", # EN-US homme (grave) | |
| "nova": "en-US-AriaNeural", # EN-US femme | |
| "shimmer": "en-US-AnaNeural", # EN-US femme (jeune) | |
| "en-jenny": "en-US-JennyNeural", | |
| "en-guy": "en-US-GuyNeural", | |
| "en-aria": "en-US-AriaNeural", | |
| "en-davis": "en-US-DavisNeural", | |
| "en-tony": "en-US-TonyNeural", | |
| # ---------- MULTILINGUE (ces voix parlent PLUSIEURS langues) ---------- | |
| "multi-jenny": "en-US-JennyMultilingualNeural", # ⭐ Multilingue | |
| "multi-ryan": "en-US-RyanMultilingualNeural", # ⭐ Multilingue | |
| "multi-ava": "en-US-AvaMultilingualNeural", # ⭐ Multilingue | |
| "multi-andrew": "en-US-AndrewMultilingualNeural", # ⭐ Multilingue | |
| "multi-emma": "en-US-EmmaMultilingualNeural", # ⭐ Multilingue | |
| "multi-brian": "en-US-BrianMultilingualNeural", # ⭐ Multilingue | |
| "multi-vivienne": "fr-FR-VivienneMultilingualNeural", # ⭐ FR Multilingue | |
| "multi-remy": "fr-FR-RemyMultilingualNeural", # ⭐ FR Multilingue | |
| # ---------- ESPAGNOL ---------- | |
| "es-elvira": "es-ES-ElviraNeural", | |
| "es-alvaro": "es-ES-AlvaroNeural", | |
| # ---------- ALLEMAND ---------- | |
| "de-katja": "de-DE-KatjaNeural", | |
| "de-conrad": "de-DE-ConradNeural", | |
| # ---------- ITALIEN ---------- | |
| "it-elsa": "it-IT-ElsaNeural", | |
| "it-diego": "it-IT-DiegoNeural", | |
| # ---------- PORTUGAIS ---------- | |
| "pt-francisca": "pt-BR-FranciscaNeural", | |
| "pt-antonio": "pt-BR-AntonioNeural", | |
| # ---------- JAPONAIS ---------- | |
| "ja-nanami": "ja-JP-NanamiNeural", | |
| "ja-keita": "ja-JP-KeitaNeural", | |
| # ---------- CHINOIS ---------- | |
| "zh-xiaoxiao": "zh-CN-XiaoxiaoNeural", | |
| "zh-yunyang": "zh-CN-YunyangNeural", | |
| # ---------- ARABE ---------- | |
| "ar-salma": "ar-SA-SalmaNeural", | |
| "ar-hamed": "ar-SA-HamedNeural", | |
| } | |
| async def root(): | |
| return { | |
| "status": "ok", | |
| "message": "Edge TTS API - OpenAI Compatible - Multilingual", | |
| "voices_count": len(VOICE_MAP), | |
| "languages": ["fr-FR", "fr-CA", "fr-BE", "fr-CH", | |
| "en-US", "en-GB", "es-ES", "de-DE", | |
| "it-IT", "pt-BR", "ja-JP", "zh-CN", "ar-SA", | |
| "multilingual"] | |
| } | |
| # ============================================================ | |
| # /v1/models — AnythingLLM appelle cet endpoint pour lister | |
| # les voix disponibles dans le dropdown | |
| # ============================================================ | |
| async def models(): | |
| model_list = [] | |
| for name, edge_voice in VOICE_MAP.items(): | |
| model_list.append({ | |
| "id": name, | |
| "object": "model", | |
| "owned_by": "edge-tts", | |
| "description": edge_voice | |
| }) | |
| return {"object": "list", "data": model_list} | |
| # ============================================================ | |
| # /v1/audio/speech — Endpoint principal compatible OpenAI TTS | |
| # ============================================================ | |
| async def speech(request: Request): | |
| try: | |
| data = await request.json() | |
| text = data.get("input", "") | |
| voice = data.get("voice", "fr-denise") # défaut = français ! | |
| speed = float(data.get("speed", 1.0)) | |
| response_format = data.get("response_format", "mp3") | |
| if not text or not text.strip(): | |
| return JSONResponse( | |
| status_code=400, | |
| content={"error": "Le champ 'input' est vide"} | |
| ) | |
| # Résoudre la voix : soit un alias du map, soit un nom Edge TTS direct | |
| edge_voice = VOICE_MAP.get(voice, voice) | |
| # Calcul du rate | |
| if speed >= 1: | |
| rate = f"+{int((speed - 1) * 100)}%" | |
| else: | |
| rate = f"{int((speed - 1) * 100)}%" | |
| # Génération audio | |
| communicate = edge_tts.Communicate(text, edge_voice, rate=rate) | |
| audio_data = io.BytesIO() | |
| async for chunk in communicate.stream(): | |
| if chunk["type"] == "audio": | |
| audio_data.write(chunk["data"]) | |
| audio_data.seek(0) | |
| # Content-type selon le format | |
| content_types = { | |
| "mp3": "audio/mpeg", | |
| "opus": "audio/opus", | |
| "aac": "audio/aac", | |
| "flac": "audio/flac", | |
| "wav": "audio/wav", | |
| } | |
| media_type = content_types.get(response_format, "audio/mpeg") | |
| return StreamingResponse(audio_data, media_type=media_type) | |
| except Exception as e: | |
| return JSONResponse( | |
| status_code=500, | |
| content={"error": str(e)} | |
| ) | |
| # ============================================================ | |
| # /v1/voices — Endpoint bonus pour lister les voix avec détails | |
| # ============================================================ | |
| async def list_voices(): | |
| """Liste toutes les voix avec leur langue et genre""" | |
| voices = [] | |
| for alias, edge_name in VOICE_MAP.items(): | |
| # Extraire la langue du nom Edge TTS | |
| parts = edge_name.split("-") | |
| if len(parts) >= 2: | |
| lang = f"{parts[0]}-{parts[1]}" | |
| else: | |
| lang = "unknown" | |
| voices.append({ | |
| "alias": alias, | |
| "edge_voice": edge_name, | |
| "language": lang, | |
| "multilingual": "Multilingual" in edge_name | |
| }) | |
| return {"voices": voices} | |
| # ============================================================ | |
| # /v1/voices/all — Liste TOUTES les voix Edge TTS disponibles | |
| # ============================================================ | |
| async def all_edge_voices(): | |
| """Récupère dynamiquement toutes les voix Edge TTS disponibles""" | |
| try: | |
| voices = await edge_tts.list_voices() | |
| return { | |
| "count": len(voices), | |
| "voices": voices | |
| } | |
| except Exception as e: | |
| return JSONResponse( | |
| status_code=500, | |
| content={"error": str(e)} | |
| ) | |
| # ============================================================ | |
| # /v1/voices/language/{lang} — Filtrer par langue | |
| # ============================================================ | |
| async def voices_by_language(lang: str): | |
| """ | |
| Filtrer les voix par langue. | |
| Exemples: /v1/voices/language/fr ou /v1/voices/language/fr-FR | |
| """ | |
| try: | |
| all_voices = await edge_tts.list_voices() | |
| filtered = [ | |
| v for v in all_voices | |
| if v["Locale"].lower().startswith(lang.lower()) | |
| ] | |
| return { | |
| "language": lang, | |
| "count": len(filtered), | |
| "voices": filtered | |
| } | |
| except Exception as e: | |
| return JSONResponse( | |
| status_code=500, | |
| content={"error": str(e)} | |
| ) | |
| async def health(): | |
| return {"status": "healthy", "voices_loaded": len(VOICE_MAP)} |