h-rand commited on
Commit
2d0b034
·
verified ·
1 Parent(s): d85d9a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, Response, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from transformers import VitsModel, AutoTokenizer
4
  import torch
@@ -44,7 +44,13 @@ def load_model():
44
  load_model()
45
 
46
  @app.post("/tts")
47
- async def generate_speech(data: dict):
 
 
 
 
 
 
48
  # Rechargement si nécessaire (Cold start)
49
  if model is None:
50
  if not load_model():
@@ -64,14 +70,24 @@ async def generate_speech(data: dict):
64
  with torch.no_grad():
65
  output = model(**inputs).waveform
66
 
67
- # 3. Conversion Audio
68
- # Le modèle sort du float32 (-1.0 à 1.0)
69
  audio_array = output.float().numpy().squeeze()
70
  sample_rate = model.config.sampling_rate
71
 
 
 
 
 
 
 
 
 
 
 
72
  # 4. Écriture WAV en mémoire
73
  buffer = io.BytesIO()
74
- scipy.io.wavfile.write(buffer, rate=sample_rate, data=audio_array)
75
  buffer.seek(0)
76
 
77
  return Response(content=buffer.read(), media_type="audio/wav")
@@ -82,4 +98,4 @@ async def generate_speech(data: dict):
82
  @app.get("/")
83
  def home():
84
  status = "Ready ✅" if model else "Error ❌"
85
- return {"status": status, "lang": "mlg (Malagasy)"}
 
1
+ from fastapi import FastAPI, Response, HTTPException, Request
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from transformers import VitsModel, AutoTokenizer
4
  import torch
 
44
  load_model()
45
 
46
  @app.post("/tts")
47
+ async def generate_speech(request: Request, data: dict):
48
+
49
+ # 🛡️ SÉCURITÉ (Décommente ces 3 lignes si tu veux bloquer les accès hors Cloudflare)
50
+ # client_token = request.headers.get("x-dynamic-token")
51
+ # if not client_token:
52
+ # raise HTTPException(status_code=403, detail="Accès refusé")
53
+
54
  # Rechargement si nécessaire (Cold start)
55
  if model is None:
56
  if not load_model():
 
70
  with torch.no_grad():
71
  output = model(**inputs).waveform
72
 
73
+ # 3. Conversion Audio & COMPRESSION
74
+ # Le modèle sort du float32 (très lourd)
75
  audio_array = output.float().numpy().squeeze()
76
  sample_rate = model.config.sampling_rate
77
 
78
+ # --- 🚀 OPTIMISATION : Division de la taille par 2 ---
79
+ # Normalisation (Met la voix au volume maximum sans grésiller)
80
+ max_amp = np.max(np.abs(audio_array))
81
+ if max_amp > 0:
82
+ audio_array = audio_array / max_amp
83
+
84
+ # Conversion de Float32 (32-bits) vers Int16 (16-bits)
85
+ audio_int16 = (audio_array * 32767.0).astype(np.int16)
86
+ # -----------------------------------------------------
87
+
88
  # 4. Écriture WAV en mémoire
89
  buffer = io.BytesIO()
90
+ scipy.io.wavfile.write(buffer, rate=sample_rate, data=audio_int16)
91
  buffer.seek(0)
92
 
93
  return Response(content=buffer.read(), media_type="audio/wav")
 
98
  @app.get("/")
99
  def home():
100
  status = "Ready ✅" if model else "Error ❌"
101
+ return {"status": status, "lang": "mlg (Malagasy)", "optimized": "Int16 Compression Active"}