edgetts-api-openai

Sleeping

App Files Files Community

Voxxium commited on Feb 9

Commit

9e5940d

verified ·

1 Parent(s): 0b7d9c5

Update app.py

Browse files

Files changed (1) hide show

app.py +201 -20

app.py CHANGED Viewed

@@ -4,52 +4,233 @@ import edge_tts
 import io
 import asyncio
-app = FastAPI(title="Edge TTS OpenAI Compatible API")
 VOICE_MAP = {
-    "alloy": "en-US-JennyNeural",
-    "echo": "en-US-GuyNeural",
-    "fable": "en-GB-SoniaNeural",
-    "onyx": "en-US-ChristopherNeural",
-    "nova": "en-US-AriaNeural",
-    "shimmer": "en-US-AnaNeural",
 }
 @app.get("/")
 async def root():
-    return {"status": "ok", "message": "Edge TTS API - OpenAI Compatible"}
 @app.get("/v1/models")
 async def models():
-    return {"object": "list", "data": [{"id": v, "object": "model"} for v in VOICE_MAP.keys()]}
 @app.post("/v1/audio/speech")
 async def speech(request: Request):
     try:
         data = await request.json()
         text = data.get("input", "")
-        voice = data.get("voice", "alloy")
         speed = float(data.get("speed", 1.0))
         edge_voice = VOICE_MAP.get(voice, voice)
         if speed >= 1:
-            rate = f"+{int((speed-1)*100)}%"
         else:
-            rate = f"{int((speed-1)*100)}%"
         communicate = edge_tts.Communicate(text, edge_voice, rate=rate)
         audio_data = io.BytesIO()
         async for chunk in communicate.stream():
             if chunk["type"] == "audio":
                 audio_data.write(chunk["data"])
         audio_data.seek(0)
-        return StreamingResponse(audio_data, media_type="audio/mpeg")
     except Exception as e:
-        return JSONResponse(status_code=500, content={"error": str(e)})
 @app.get("/health")
 async def health():
-    return {"status": "healthy"}

 import io
 import asyncio
+app = FastAPI(title="Edge TTS OpenAI Compatible API - Multilingual")
+# ============================================================
+# VOICE MAP : noms compatibles OpenAI -> voix Edge TTS
+# AnythingLLM envoie le nom du "model" comme champ "voice"
+# ============================================================
 VOICE_MAP = {
+    # ---------- FRANÇAIS ----------
+    "fr-denise":       "fr-FR-DeniseNeural",        # FR femme (naturelle)
+    "fr-henri":        "fr-FR-HenriNeural",         # FR homme
+    "fr-eloise":       "fr-FR-EloiseNeural",        # FR femme (jeune)
+    "fr-ca-sylvie":    "fr-CA-SylvieNeural",        # FR Canada femme
+    "fr-ca-antoine":   "fr-CA-AntoineNeural",       # FR Canada homme
+    "fr-be-charline":  "fr-BE-CharlineNeural",      # FR Belgique femme
+    "fr-be-gerard":    "fr-BE-GerardNeural",        # FR Belgique homme
+    "fr-ch-ariane":    "fr-CH-ArianeNeural",        # FR Suisse femme
+    "fr-ch-fabrice":   "fr-CH-FabriceNeural",       # FR Suisse homme
+    # ---------- ENGLISH ----------
+    "alloy":           "en-US-JennyNeural",         # EN-US femme
+    "echo":            "en-US-GuyNeural",           # EN-US homme
+    "fable":           "en-GB-SoniaNeural",         # EN-GB femme
+    "onyx":            "en-US-ChristopherNeural",   # EN-US homme (grave)
+    "nova":            "en-US-AriaNeural",          # EN-US femme
+    "shimmer":         "en-US-AnaNeural",           # EN-US femme (jeune)
+    "en-jenny":        "en-US-JennyNeural",
+    "en-guy":          "en-US-GuyNeural",
+    "en-aria":         "en-US-AriaNeural",
+    "en-davis":        "en-US-DavisNeural",
+    "en-tony":         "en-US-TonyNeural",
+    # ---------- MULTILINGUE (ces voix parlent PLUSIEURS langues) ----------
+    "multi-jenny":     "en-US-JennyMultilingualNeural",    # ⭐ Multilingue
+    "multi-ryan":      "en-US-RyanMultilingualNeural",     # ⭐ Multilingue
+    "multi-ava":       "en-US-AvaMultilingualNeural",      # ⭐ Multilingue
+    "multi-andrew":    "en-US-AndrewMultilingualNeural",   # ⭐ Multilingue
+    "multi-emma":      "en-US-EmmaMultilingualNeural",     # ⭐ Multilingue
+    "multi-brian":     "en-US-BrianMultilingualNeural",    # ⭐ Multilingue
+    "multi-vivienne":  "fr-FR-VivienneMultilingualNeural", # ⭐ FR Multilingue
+    "multi-remy":      "fr-FR-RemyMultilingualNeural",     # ⭐ FR Multilingue
+    # ---------- ESPAGNOL ----------
+    "es-elvira":       "es-ES-ElviraNeural",
+    "es-alvaro":       "es-ES-AlvaroNeural",
+    # ---------- ALLEMAND ----------
+    "de-katja":        "de-DE-KatjaNeural",
+    "de-conrad":       "de-DE-ConradNeural",
+    # ---------- ITALIEN ----------
+    "it-elsa":         "it-IT-ElsaNeural",
+    "it-diego":        "it-IT-DiegoNeural",
+    # ---------- PORTUGAIS ----------
+    "pt-francisca":    "pt-BR-FranciscaNeural",
+    "pt-antonio":      "pt-BR-AntonioNeural",
+    # ---------- JAPONAIS ----------
+    "ja-nanami":       "ja-JP-NanamiNeural",
+    "ja-keita":        "ja-JP-KeitaNeural",
+    # ---------- CHINOIS ----------
+    "zh-xiaoxiao":     "zh-CN-XiaoxiaoNeural",
+    "zh-yunyang":      "zh-CN-YunyangNeural",
+    # ---------- ARABE ----------
+    "ar-salma":        "ar-SA-SalmaNeural",
+    "ar-hamed":        "ar-SA-HamedNeural",
 }
 @app.get("/")
 async def root():
+    return {
+        "status": "ok",
+        "message": "Edge TTS API - OpenAI Compatible - Multilingual",
+        "voices_count": len(VOICE_MAP),
+        "languages": ["fr-FR", "fr-CA", "fr-BE", "fr-CH",
+                       "en-US", "en-GB", "es-ES", "de-DE",
+                       "it-IT", "pt-BR", "ja-JP", "zh-CN", "ar-SA",
+                       "multilingual"]
+    }
+# ============================================================
+# /v1/models  — AnythingLLM appelle cet endpoint pour lister
+# les voix disponibles dans le dropdown
+# ============================================================
 @app.get("/v1/models")
 async def models():
+    model_list = []
+    for name, edge_voice in VOICE_MAP.items():
+        model_list.append({
+            "id": name,
+            "object": "model",
+            "owned_by": "edge-tts",
+            "description": edge_voice
+        })
+    return {"object": "list", "data": model_list}
+# ============================================================
+# /v1/audio/speech  — Endpoint principal compatible OpenAI TTS
+# ============================================================
 @app.post("/v1/audio/speech")
 async def speech(request: Request):
     try:
         data = await request.json()
         text = data.get("input", "")
+        voice = data.get("voice", "fr-denise")  # défaut = français !
         speed = float(data.get("speed", 1.0))
+        response_format = data.get("response_format", "mp3")
+        if not text or not text.strip():
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Le champ 'input' est vide"}
+            )
+        # Résoudre la voix : soit un alias du map, soit un nom Edge TTS direct
         edge_voice = VOICE_MAP.get(voice, voice)
+        # Calcul du rate
         if speed >= 1:
+            rate = f"+{int((speed - 1) * 100)}%"
         else:
+            rate = f"{int((speed - 1) * 100)}%"
+        # Génération audio
         communicate = edge_tts.Communicate(text, edge_voice, rate=rate)
         audio_data = io.BytesIO()
         async for chunk in communicate.stream():
             if chunk["type"] == "audio":
                 audio_data.write(chunk["data"])
         audio_data.seek(0)
+        # Content-type selon le format
+        content_types = {
+            "mp3": "audio/mpeg",
+            "opus": "audio/opus",
+            "aac": "audio/aac",
+            "flac": "audio/flac",
+            "wav": "audio/wav",
+        }
+        media_type = content_types.get(response_format, "audio/mpeg")
+        return StreamingResponse(audio_data, media_type=media_type)
     except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": str(e)}
+        )
+# ============================================================
+# /v1/voices — Endpoint bonus pour lister les voix avec détails
+# ============================================================
+@app.get("/v1/voices")
+async def list_voices():
+    """Liste toutes les voix avec leur langue et genre"""
+    voices = []
+    for alias, edge_name in VOICE_MAP.items():
+        # Extraire la langue du nom Edge TTS
+        parts = edge_name.split("-")
+        if len(parts) >= 2:
+            lang = f"{parts[0]}-{parts[1]}"
+        else:
+            lang = "unknown"
+        voices.append({
+            "alias": alias,
+            "edge_voice": edge_name,
+            "language": lang,
+            "multilingual": "Multilingual" in edge_name
+        })
+    return {"voices": voices}
+# ============================================================
+# /v1/voices/all — Liste TOUTES les voix Edge TTS disponibles
+# ============================================================
+@app.get("/v1/voices/all")
+async def all_edge_voices():
+    """Récupère dynamiquement toutes les voix Edge TTS disponibles"""
+    try:
+        voices = await edge_tts.list_voices()
+        return {
+            "count": len(voices),
+            "voices": voices
+        }
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": str(e)}
+        )
+# ============================================================
+# /v1/voices/language/{lang} — Filtrer par langue
+# ============================================================
+@app.get("/v1/voices/language/{lang}")
+async def voices_by_language(lang: str):
+    """
+    Filtrer les voix par langue.
+    Exemples: /v1/voices/language/fr  ou  /v1/voices/language/fr-FR
+    """
+    try:
+        all_voices = await edge_tts.list_voices()
+        filtered = [
+            v for v in all_voices
+            if v["Locale"].lower().startswith(lang.lower())
+        ]
+        return {
+            "language": lang,
+            "count": len(filtered),
+            "voices": filtered
+        }
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": str(e)}
+        )
 @app.get("/health")
 async def health():
+    return {"status": "healthy", "voices_loaded": len(VOICE_MAP)}