Spaces:

Muhammadidrees
/

RiayatechChatDoctor

Build error

Muhammadidrees commited on Sep 29, 2025

Commit

9e218ef

verified ·

1 Parent(s): c7de4ce

Update DocVoice.py

Files changed (1) hide show

DocVoice.py CHANGED Viewed

@@ -1,25 +1,24 @@
 # DocVoice.py
 import torch
 from transformers import pipeline
 # -------------------
 # 1️⃣ Detect GPU
 # -------------------
 use_cuda = torch.cuda.is_available()
-device_index = 0 if use_cuda else -1
-device_str = "cuda" if use_cuda else "cpu"
-dtype = torch.float16 if use_cuda else torch.float32
 # -------------------
-# 2️⃣ Load TTS model from Hugging Face
 # -------------------
-tts_model_id = "espnet/kan-bayashi_ljspeech_vits"  # Example TTS model, English voice
 tts_pipe = pipeline(
     "text-to-speech",
     model=tts_model_id,
-    device=device_index,
-    torch_dtype=dtype
 )
 print("🔊 TTS pipeline ready using Hugging Face.")
@@ -32,23 +31,14 @@ def text_to_speech(text: str, filename="assistant_response.wav"):
     Generate speech from text and save as WAV file.
     """
     if not text.strip():
-        return
     print(f"📝 Generating audio for: {text}")
-    # Generate audio
-    speech_array = tts_pipe(text)["audio"]
-    # Convert to int16 and save as WAV
-    import numpy as np
-    import scipy.io.wavfile as wav
-    wav.write(filename, 22050, (speech_array * 32767).astype(np.int16))
     print(f"✅ Audio saved as {filename}")
-    # Optional: play audio automatically (requires sounddevice)
-    try:
-        import sounddevice as sd
-        sd.play(speech_array, samplerate=22050)
-    except Exception as e:
-        print(f"⚠️ Could not play audio automatically: {e}")

 # DocVoice.py
 import torch
 from transformers import pipeline
+import soundfile as sf
 # -------------------
 # 1️⃣ Detect GPU
 # -------------------
 use_cuda = torch.cuda.is_available()
+device = 0 if use_cuda else -1
+print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}")
 # -------------------
+# 2️⃣ Load TTS model
 # -------------------
+tts_model_id = "microsoft/speecht5_tts"  # Compatible TTS model
 tts_pipe = pipeline(
     "text-to-speech",
     model=tts_model_id,
+    device=device
 )
 print("🔊 TTS pipeline ready using Hugging Face.")
     Generate speech from text and save as WAV file.
     """
     if not text.strip():
+        return None
     print(f"📝 Generating audio for: {text}")
+    speech_array = tts_pipe(text)[0]["array"]  # returns numpy array
+    sample_rate = tts_pipe.model.config.sampling_rate if hasattr(tts_pipe.model.config, "sampling_rate") else 16000
+    # Save audio
+    sf.write(filename, speech_array, sample_rate)
     print(f"✅ Audio saved as {filename}")
+    return filename