Muhammadidrees commited on
Commit
b100d07
·
verified ·
1 Parent(s): c0ebb5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -0
app.py CHANGED
@@ -508,6 +508,30 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
508
  # =============================
509
  # Voice Chat Functions
510
  # =============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  def process_voice_input(audio, history, temp, max_tok, topk, session_id):
512
  """Process voice input: transcribe, get response, convert to speech"""
513
  if audio is None:
 
508
  # =============================
509
  # Voice Chat Functions
510
  # =============================
511
+ def text_to_speech(text):
512
+ # Convert text to speech using Bark
513
+ from transformers import AutoProcessor, BarkModel
514
+ import numpy as np
515
+
516
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
517
+ model = BarkModel.from_pretrained("suno/bark-small")
518
+
519
+ inputs = processor(text, voice_preset="v2/en_speaker_6", return_tensors="pt")
520
+ speech = model.generate(**inputs)
521
+
522
+ # ✅ Extract and normalize audio data
523
+ audio_data = speech["audio"]
524
+ sampling_rate = speech["sampling_rate"]
525
+
526
+ # 🔊 Normalize & clip Bark audio output to avoid struct.error
527
+ if isinstance(audio_data, np.ndarray):
528
+ audio_data = np.clip(audio_data, -1.0, 1.0).astype(np.float32)
529
+ else:
530
+ audio_data = np.array(audio_data, dtype=np.float32)
531
+ audio_data = np.clip(audio_data, -1.0, 1.0)
532
+
533
+ return (sampling_rate, audio_data)
534
+
535
  def process_voice_input(audio, history, temp, max_tok, topk, session_id):
536
  """Process voice input: transcribe, get response, convert to speech"""
537
  if audio is None: