CrazyMonkey0 commited on
Commit
2a3f624
·
1 Parent(s): 5f3ceca

feat(tts): migrate Kokoro TTS to Hugging Face facebook/mms-tts-eng with in-memory optimization

Browse files

- Replaced previous Kokoro-based TTS pipeline with Hugging Face AutoModelForTextToWaveform.
- Updated to generate WAV in-memory without saving to disk.
- Uses tokenizer + model from app.state for FastAPI conventions.
- Returns audio as bytes or URL (depending on implementation), improving performance and reducing I/O.

Files changed (1) hide show
  1. app/routes/tts.py +1 -1
app/routes/tts.py CHANGED
@@ -4,7 +4,6 @@ from fastapi.responses import StreamingResponse
4
  import numpy as np
5
  import scipy.io.wavfile as sf
6
  import torch
7
- import uuid
8
  import io
9
 
10
 
@@ -22,6 +21,7 @@ def load_model_tts():
22
 
23
 
24
  def save_audio(request: Request, text: str,) -> bytes:
 
25
  model, tokenizer = request.app.state.model_tts, request.app.state.tokenizer_tts
26
  inputs = tokenizer(text, return_tensors="pt")
27
 
 
4
  import numpy as np
5
  import scipy.io.wavfile as sf
6
  import torch
 
7
  import io
8
 
9
 
 
21
 
22
 
23
  def save_audio(request: Request, text: str,) -> bytes:
24
+ """ function to generate audio from text using TTS model """
25
  model, tokenizer = request.app.state.model_tts, request.app.state.tokenizer_tts
26
  inputs = tokenizer(text, return_tensors="pt")
27