Abdalkaderdev commited on
Commit
bf9e8b9
·
1 Parent(s): 15307bb

Switch back to Supertonic 2 TTS for CPU compatibility

Browse files
Files changed (1) hide show
  1. app/ora_server.py +11 -14
app/ora_server.py CHANGED
@@ -240,7 +240,7 @@ async def detect_emotion(req: EmotionRequest):
240
 
241
 
242
 
243
- # TTS endpoint using Bark (Natural, Expressive Voice)
244
  tts_model = None
245
  tts_processor = None
246
 
@@ -248,16 +248,17 @@ tts_processor = None
248
  async def load_tts():
249
  global tts_model, tts_processor
250
  try:
251
- print("Loading Bark TTS for natural voice...")
252
- from transformers import AutoProcessor, BarkModel
253
 
254
- tts_processor = AutoProcessor.from_pretrained("suno/bark-small")
255
- tts_model = BarkModel.from_pretrained("suno/bark-small")
 
256
 
257
  if device == "cuda":
258
  tts_model = tts_model.to("cuda")
259
 
260
- print("✓ Bark TTS loaded - Natural voice ready!")
261
  except Exception as e:
262
  print(f"Could not load TTS model: {e}")
263
  print("Voice will fall back to browser TTS.")
@@ -273,24 +274,20 @@ async def text_to_speech(req: TTSRequest):
273
  raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback")
274
 
275
  try:
276
- # Use Bark with natural voice preset
277
- inputs = tts_processor(
278
- text=req.text,
279
- voice_preset="v2/en_speaker_6", # Warm, natural female voice
280
- return_tensors="pt"
281
- )
282
 
283
  if device == "cuda":
284
  inputs = {k: v.to("cuda") for k, v in inputs.items()}
285
 
286
  with torch.no_grad():
287
- audio_array = tts_model.generate(**inputs)
288
 
289
  # Convert to WAV format
290
  import io
291
  import wave
292
 
293
- audio_np = audio_array.cpu().numpy().squeeze()
294
 
295
  # Normalize to 16-bit PCM
296
  audio_np = (audio_np * 32767).astype('int16')
 
240
 
241
 
242
 
243
+ # TTS endpoint using Supertonic 2 (CPU-friendly)
244
  tts_model = None
245
  tts_processor = None
246
 
 
248
  async def load_tts():
249
  global tts_model, tts_processor
250
  try:
251
+ print("Loading Supertonic 2 TTS...")
252
+ from transformers import AutoProcessor, AutoModelForTextToWaveform
253
 
254
+ model_id = "Supertone/supertonic-2"
255
+ tts_processor = AutoProcessor.from_pretrained(model_id)
256
+ tts_model = AutoModelForTextToWaveform.from_pretrained(model_id)
257
 
258
  if device == "cuda":
259
  tts_model = tts_model.to("cuda")
260
 
261
+ print("✓ Supertonic 2 TTS loaded successfully!")
262
  except Exception as e:
263
  print(f"Could not load TTS model: {e}")
264
  print("Voice will fall back to browser TTS.")
 
274
  raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback")
275
 
276
  try:
277
+ # Process text with Supertonic 2
278
+ inputs = tts_processor(text=req.text, return_tensors="pt", sampling_rate=24000)
 
 
 
 
279
 
280
  if device == "cuda":
281
  inputs = {k: v.to("cuda") for k, v in inputs.items()}
282
 
283
  with torch.no_grad():
284
+ audio_values = tts_model.generate(**inputs)
285
 
286
  # Convert to WAV format
287
  import io
288
  import wave
289
 
290
+ audio_np = audio_values.cpu().numpy().squeeze()
291
 
292
  # Normalize to 16-bit PCM
293
  audio_np = (audio_np * 32767).astype('int16')