RiayatechChatDoctor / DocVoice.py
Muhammadidrees's picture
Update DocVoice.py
0109f78 verified
raw
history blame
1.48 kB
# DocVoice.py
import torch
from transformers import pipeline
# -------------------
# 1️⃣ Detect GPU
# -------------------
use_cuda = torch.cuda.is_available()
device_index = 0 if use_cuda else -1
device_str = "cuda" if use_cuda else "cpu"
dtype = torch.float16 if use_cuda else torch.float32
# -------------------
# 2️⃣ Load TTS model from Hugging Face
# -------------------
tts_model_id = "espnet/kan-bayashi_ljspeech_vits" # Example TTS model, English voice
tts_pipe = pipeline(
"text-to-speech",
model=tts_model_id,
device=device_index,
torch_dtype=dtype
)
print("🔊 TTS pipeline ready using Hugging Face.")
# -------------------
# 3️⃣ TTS Helper Function
# -------------------
def text_to_speech(text: str, filename="assistant_response.wav"):
"""
Generate speech from text and save as WAV file.
"""
if not text.strip():
return
print(f"📝 Generating audio for: {text}")
# Generate audio
speech_array = tts_pipe(text)["audio"]
# Convert to int16 and save as WAV
import numpy as np
import scipy.io.wavfile as wav
wav.write(filename, 22050, (speech_array * 32767).astype(np.int16))
print(f"✅ Audio saved as {filename}")
# Optional: play audio automatically (requires sounddevice)
try:
import sounddevice as sd
sd.play(speech_array, samplerate=22050)
except Exception as e:
print(f"⚠️ Could not play audio automatically: {e}")