Spaces:

Muhammadidrees
/

RiayatechChatDoctor

Sleeping

RiayatechChatDoctor / DocVoice.py

Update DocVoice.py

0109f78 verified 3 months ago

1.48 kB

	# DocVoice.py
	import torch
	from transformers import pipeline

	# -------------------
	# 1️⃣ Detect GPU
	# -------------------
	use_cuda = torch.cuda.is_available()
	device_index = 0 if use_cuda else -1
	device_str = "cuda" if use_cuda else "cpu"
	dtype = torch.float16 if use_cuda else torch.float32

	# -------------------
	# 2️⃣ Load TTS model from Hugging Face
	# -------------------
	tts_model_id = "espnet/kan-bayashi_ljspeech_vits" # Example TTS model, English voice

	tts_pipe = pipeline(
	"text-to-speech",
	model=tts_model_id,
	device=device_index,
	torch_dtype=dtype
	)

	print("🔊 TTS pipeline ready using Hugging Face.")

	# -------------------
	# 3️⃣ TTS Helper Function
	# -------------------
	def text_to_speech(text: str, filename="assistant_response.wav"):
	"""
	Generate speech from text and save as WAV file.
	"""
	if not text.strip():
	return

	print(f"📝 Generating audio for: {text}")

	# Generate audio
	speech_array = tts_pipe(text)["audio"]

	# Convert to int16 and save as WAV
	import numpy as np
	import scipy.io.wavfile as wav

	wav.write(filename, 22050, (speech_array * 32767).astype(np.int16))
	print(f"✅ Audio saved as {filename}")

	# Optional: play audio automatically (requires sounddevice)
	try:
	import sounddevice as sd
	sd.play(speech_array, samplerate=22050)
	except Exception as e:
	print(f"⚠️ Could not play audio automatically: {e}")