Voxxium commited on
Commit
9e5940d
·
verified ·
1 Parent(s): 0b7d9c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -20
app.py CHANGED
@@ -4,52 +4,233 @@ import edge_tts
4
  import io
5
  import asyncio
6
 
7
- app = FastAPI(title="Edge TTS OpenAI Compatible API")
8
 
 
 
 
 
9
  VOICE_MAP = {
10
- "alloy": "en-US-JennyNeural",
11
- "echo": "en-US-GuyNeural",
12
- "fable": "en-GB-SoniaNeural",
13
- "onyx": "en-US-ChristopherNeural",
14
- "nova": "en-US-AriaNeural",
15
- "shimmer": "en-US-AnaNeural",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
17
 
 
18
  @app.get("/")
19
  async def root():
20
- return {"status": "ok", "message": "Edge TTS API - OpenAI Compatible"}
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
22
  @app.get("/v1/models")
23
  async def models():
24
- return {"object": "list", "data": [{"id": v, "object": "model"} for v in VOICE_MAP.keys()]}
 
 
 
 
 
 
 
 
25
 
 
 
 
 
26
  @app.post("/v1/audio/speech")
27
  async def speech(request: Request):
28
  try:
29
  data = await request.json()
30
  text = data.get("input", "")
31
- voice = data.get("voice", "alloy")
32
  speed = float(data.get("speed", 1.0))
33
-
 
 
 
 
 
 
 
 
34
  edge_voice = VOICE_MAP.get(voice, voice)
35
-
 
36
  if speed >= 1:
37
- rate = f"+{int((speed-1)*100)}%"
38
  else:
39
- rate = f"{int((speed-1)*100)}%"
40
-
 
41
  communicate = edge_tts.Communicate(text, edge_voice, rate=rate)
42
-
43
  audio_data = io.BytesIO()
44
  async for chunk in communicate.stream():
45
  if chunk["type"] == "audio":
46
  audio_data.write(chunk["data"])
47
-
48
  audio_data.seek(0)
49
- return StreamingResponse(audio_data, media_type="audio/mpeg")
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
- return JSONResponse(status_code=500, content={"error": str(e)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  @app.get("/health")
54
  async def health():
55
- return {"status": "healthy"}
 
4
  import io
5
  import asyncio
6
 
7
+ app = FastAPI(title="Edge TTS OpenAI Compatible API - Multilingual")
8
 
9
+ # ============================================================
10
+ # VOICE MAP : noms compatibles OpenAI -> voix Edge TTS
11
+ # AnythingLLM envoie le nom du "model" comme champ "voice"
12
+ # ============================================================
13
  VOICE_MAP = {
14
+ # ---------- FRANÇAIS ----------
15
+ "fr-denise": "fr-FR-DeniseNeural", # FR femme (naturelle)
16
+ "fr-henri": "fr-FR-HenriNeural", # FR homme
17
+ "fr-eloise": "fr-FR-EloiseNeural", # FR femme (jeune)
18
+ "fr-ca-sylvie": "fr-CA-SylvieNeural", # FR Canada femme
19
+ "fr-ca-antoine": "fr-CA-AntoineNeural", # FR Canada homme
20
+ "fr-be-charline": "fr-BE-CharlineNeural", # FR Belgique femme
21
+ "fr-be-gerard": "fr-BE-GerardNeural", # FR Belgique homme
22
+ "fr-ch-ariane": "fr-CH-ArianeNeural", # FR Suisse femme
23
+ "fr-ch-fabrice": "fr-CH-FabriceNeural", # FR Suisse homme
24
+
25
+ # ---------- ENGLISH ----------
26
+ "alloy": "en-US-JennyNeural", # EN-US femme
27
+ "echo": "en-US-GuyNeural", # EN-US homme
28
+ "fable": "en-GB-SoniaNeural", # EN-GB femme
29
+ "onyx": "en-US-ChristopherNeural", # EN-US homme (grave)
30
+ "nova": "en-US-AriaNeural", # EN-US femme
31
+ "shimmer": "en-US-AnaNeural", # EN-US femme (jeune)
32
+ "en-jenny": "en-US-JennyNeural",
33
+ "en-guy": "en-US-GuyNeural",
34
+ "en-aria": "en-US-AriaNeural",
35
+ "en-davis": "en-US-DavisNeural",
36
+ "en-tony": "en-US-TonyNeural",
37
+
38
+ # ---------- MULTILINGUE (ces voix parlent PLUSIEURS langues) ----------
39
+ "multi-jenny": "en-US-JennyMultilingualNeural", # ⭐ Multilingue
40
+ "multi-ryan": "en-US-RyanMultilingualNeural", # ⭐ Multilingue
41
+ "multi-ava": "en-US-AvaMultilingualNeural", # ⭐ Multilingue
42
+ "multi-andrew": "en-US-AndrewMultilingualNeural", # ⭐ Multilingue
43
+ "multi-emma": "en-US-EmmaMultilingualNeural", # ⭐ Multilingue
44
+ "multi-brian": "en-US-BrianMultilingualNeural", # ⭐ Multilingue
45
+ "multi-vivienne": "fr-FR-VivienneMultilingualNeural", # ⭐ FR Multilingue
46
+ "multi-remy": "fr-FR-RemyMultilingualNeural", # ⭐ FR Multilingue
47
+
48
+ # ---------- ESPAGNOL ----------
49
+ "es-elvira": "es-ES-ElviraNeural",
50
+ "es-alvaro": "es-ES-AlvaroNeural",
51
+
52
+ # ---------- ALLEMAND ----------
53
+ "de-katja": "de-DE-KatjaNeural",
54
+ "de-conrad": "de-DE-ConradNeural",
55
+
56
+ # ---------- ITALIEN ----------
57
+ "it-elsa": "it-IT-ElsaNeural",
58
+ "it-diego": "it-IT-DiegoNeural",
59
+
60
+ # ---------- PORTUGAIS ----------
61
+ "pt-francisca": "pt-BR-FranciscaNeural",
62
+ "pt-antonio": "pt-BR-AntonioNeural",
63
+
64
+ # ---------- JAPONAIS ----------
65
+ "ja-nanami": "ja-JP-NanamiNeural",
66
+ "ja-keita": "ja-JP-KeitaNeural",
67
+
68
+ # ---------- CHINOIS ----------
69
+ "zh-xiaoxiao": "zh-CN-XiaoxiaoNeural",
70
+ "zh-yunyang": "zh-CN-YunyangNeural",
71
+
72
+ # ---------- ARABE ----------
73
+ "ar-salma": "ar-SA-SalmaNeural",
74
+ "ar-hamed": "ar-SA-HamedNeural",
75
  }
76
 
77
+
78
  @app.get("/")
79
  async def root():
80
+ return {
81
+ "status": "ok",
82
+ "message": "Edge TTS API - OpenAI Compatible - Multilingual",
83
+ "voices_count": len(VOICE_MAP),
84
+ "languages": ["fr-FR", "fr-CA", "fr-BE", "fr-CH",
85
+ "en-US", "en-GB", "es-ES", "de-DE",
86
+ "it-IT", "pt-BR", "ja-JP", "zh-CN", "ar-SA",
87
+ "multilingual"]
88
+ }
89
+
90
 
91
+ # ============================================================
92
+ # /v1/models — AnythingLLM appelle cet endpoint pour lister
93
+ # les voix disponibles dans le dropdown
94
+ # ============================================================
95
  @app.get("/v1/models")
96
  async def models():
97
+ model_list = []
98
+ for name, edge_voice in VOICE_MAP.items():
99
+ model_list.append({
100
+ "id": name,
101
+ "object": "model",
102
+ "owned_by": "edge-tts",
103
+ "description": edge_voice
104
+ })
105
+ return {"object": "list", "data": model_list}
106
 
107
+
108
+ # ============================================================
109
+ # /v1/audio/speech — Endpoint principal compatible OpenAI TTS
110
+ # ============================================================
111
  @app.post("/v1/audio/speech")
112
  async def speech(request: Request):
113
  try:
114
  data = await request.json()
115
  text = data.get("input", "")
116
+ voice = data.get("voice", "fr-denise") # défaut = français !
117
  speed = float(data.get("speed", 1.0))
118
+ response_format = data.get("response_format", "mp3")
119
+
120
+ if not text or not text.strip():
121
+ return JSONResponse(
122
+ status_code=400,
123
+ content={"error": "Le champ 'input' est vide"}
124
+ )
125
+
126
+ # Résoudre la voix : soit un alias du map, soit un nom Edge TTS direct
127
  edge_voice = VOICE_MAP.get(voice, voice)
128
+
129
+ # Calcul du rate
130
  if speed >= 1:
131
+ rate = f"+{int((speed - 1) * 100)}%"
132
  else:
133
+ rate = f"{int((speed - 1) * 100)}%"
134
+
135
+ # Génération audio
136
  communicate = edge_tts.Communicate(text, edge_voice, rate=rate)
137
+
138
  audio_data = io.BytesIO()
139
  async for chunk in communicate.stream():
140
  if chunk["type"] == "audio":
141
  audio_data.write(chunk["data"])
142
+
143
  audio_data.seek(0)
144
+
145
+ # Content-type selon le format
146
+ content_types = {
147
+ "mp3": "audio/mpeg",
148
+ "opus": "audio/opus",
149
+ "aac": "audio/aac",
150
+ "flac": "audio/flac",
151
+ "wav": "audio/wav",
152
+ }
153
+ media_type = content_types.get(response_format, "audio/mpeg")
154
+
155
+ return StreamingResponse(audio_data, media_type=media_type)
156
+
157
  except Exception as e:
158
+ return JSONResponse(
159
+ status_code=500,
160
+ content={"error": str(e)}
161
+ )
162
+
163
+
164
+ # ============================================================
165
+ # /v1/voices — Endpoint bonus pour lister les voix avec détails
166
+ # ============================================================
167
+ @app.get("/v1/voices")
168
+ async def list_voices():
169
+ """Liste toutes les voix avec leur langue et genre"""
170
+ voices = []
171
+ for alias, edge_name in VOICE_MAP.items():
172
+ # Extraire la langue du nom Edge TTS
173
+ parts = edge_name.split("-")
174
+ if len(parts) >= 2:
175
+ lang = f"{parts[0]}-{parts[1]}"
176
+ else:
177
+ lang = "unknown"
178
+
179
+ voices.append({
180
+ "alias": alias,
181
+ "edge_voice": edge_name,
182
+ "language": lang,
183
+ "multilingual": "Multilingual" in edge_name
184
+ })
185
+ return {"voices": voices}
186
+
187
+
188
+ # ============================================================
189
+ # /v1/voices/all — Liste TOUTES les voix Edge TTS disponibles
190
+ # ============================================================
191
+ @app.get("/v1/voices/all")
192
+ async def all_edge_voices():
193
+ """Récupère dynamiquement toutes les voix Edge TTS disponibles"""
194
+ try:
195
+ voices = await edge_tts.list_voices()
196
+ return {
197
+ "count": len(voices),
198
+ "voices": voices
199
+ }
200
+ except Exception as e:
201
+ return JSONResponse(
202
+ status_code=500,
203
+ content={"error": str(e)}
204
+ )
205
+
206
+
207
+ # ============================================================
208
+ # /v1/voices/language/{lang} — Filtrer par langue
209
+ # ============================================================
210
+ @app.get("/v1/voices/language/{lang}")
211
+ async def voices_by_language(lang: str):
212
+ """
213
+ Filtrer les voix par langue.
214
+ Exemples: /v1/voices/language/fr ou /v1/voices/language/fr-FR
215
+ """
216
+ try:
217
+ all_voices = await edge_tts.list_voices()
218
+ filtered = [
219
+ v for v in all_voices
220
+ if v["Locale"].lower().startswith(lang.lower())
221
+ ]
222
+ return {
223
+ "language": lang,
224
+ "count": len(filtered),
225
+ "voices": filtered
226
+ }
227
+ except Exception as e:
228
+ return JSONResponse(
229
+ status_code=500,
230
+ content={"error": str(e)}
231
+ )
232
+
233
 
234
  @app.get("/health")
235
  async def health():
236
+ return {"status": "healthy", "voices_loaded": len(VOICE_MAP)}