saidchrf commited on
Commit
b538a81
·
verified ·
1 Parent(s): e70a374

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -28
main.py CHANGED
@@ -1,22 +1,20 @@
1
- from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import StreamingResponse
3
  from pydantic import BaseModel
4
  from kokoro import KPipeline
5
- import soundfile as sf
6
  import io
7
  import logging
8
- import time
9
 
10
- # --- Configurer les logs ---
11
  logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger("kokoro-stream")
13
 
14
  app = FastAPI()
15
 
16
  # Initialiser le pipeline au démarrage
17
- logger.info("🔍 Initialisation du pipeline Kokoro...")
18
  pipeline = KPipeline(lang_code='a', device='cpu')
19
- logger.info("✅ Pipeline Kokoro initialisé")
20
 
21
  class TTSRequest(BaseModel):
22
  text: str
@@ -25,26 +23,28 @@ class TTSRequest(BaseModel):
25
 
26
  @app.post("/tts/stream")
27
  async def stream_speech(request: TTSRequest):
28
- logger.info(f"🚀 Streaming demandé pour le texte: '{request.text}'")
29
- start_time = time.time()
30
 
31
  def generate():
32
- chunk_count = 0
33
- for result in pipeline(request.text, voice=request.voice, speed=request.speed):
34
- chunk_count += 1
35
- if hasattr(result.audio, "numpy"):
36
- audio_data = result.audio.numpy()
37
- else:
38
- audio_data = result.audio
39
-
40
- logger.info(f" Chunk {chunk_count} généré, taille={len(audio_data)} samples")
41
-
42
- buffer = io.BytesIO()
43
- sf.write(buffer, audio_data, 24000, format='WAV')
44
- buffer.seek(0)
45
- yield buffer.read()
46
-
47
- elapsed = time.time() - start_time
48
- logger.info(f"🏁 Streaming terminé, {chunk_count} chunks envoyés en {elapsed:.2f}s")
49
-
50
- return StreamingResponse(generate(), media_type="audio/wav")
 
 
 
 
1
+ from fastapi import FastAPI
2
  from fastapi.responses import StreamingResponse
3
  from pydantic import BaseModel
4
  from kokoro import KPipeline
5
+ import numpy as np
6
  import io
7
  import logging
 
8
 
9
+ # --- Logging ---
10
  logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger("tts_stream")
12
 
13
  app = FastAPI()
14
 
15
  # Initialiser le pipeline au démarrage
 
16
  pipeline = KPipeline(lang_code='a', device='cpu')
17
+ logger.info("✅ KPipeline loaded successfully.")
18
 
19
  class TTSRequest(BaseModel):
20
  text: str
 
23
 
24
  @app.post("/tts/stream")
25
  async def stream_speech(request: TTSRequest):
26
+ logger.info(f"🚀 Streaming request received: text='{request.text[:50]}...', voice='{request.voice}', speed={request.speed}")
 
27
 
28
  def generate():
29
+ chunk_index = 0
30
+ try:
31
+ for result in pipeline(request.text, voice=request.voice, speed=request.speed):
32
+ chunk_index += 1
33
+ # Convertir en PCM float32
34
+ audio_bytes = result.audio.numpy().astype(np.float32).tobytes()
35
+ logger.info(f"✅ Chunk {chunk_index} ready, size={len(audio_bytes)} bytes")
36
+ yield audio_bytes
37
+ logger.info(f"🏁 Streaming finished: total chunks={chunk_index}")
38
+ except Exception as e:
39
+ logger.error(f"❌ Streaming error at chunk {chunk_index}: {e}")
40
+ raise
41
+
42
+ return StreamingResponse(
43
+ generate(),
44
+ media_type="audio/pcm",
45
+ headers={
46
+ "X-Sample-Rate": "24000",
47
+ "X-Channels": "1",
48
+ "X-Bit-Depth": "32"
49
+ }
50
+ )