# DocVoice.py import torch from transformers import pipeline import soundfile as sf # ------------------- # 1️⃣ Detect GPU # ------------------- use_cuda = torch.cuda.is_available() device = 0 if use_cuda else -1 print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}") # ------------------- # 2️⃣ Load TTS model # ------------------- tts_model_id = "microsoft/speecht5_tts" # Compatible TTS model tts_pipe = pipeline( "text-to-speech", model=tts_model_id, device=device ) print("🔊 TTS pipeline ready using Hugging Face.") # ------------------- # 3️⃣ TTS Helper Function # ------------------- def text_to_speech(text: str, filename="assistant_response.wav"): """ Generate speech from text and save as WAV file. """ if not text.strip(): return None print(f"📝 Generating audio for: {text}") speech_array = tts_pipe(text)[0]["array"] # returns numpy array sample_rate = tts_pipe.model.config.sampling_rate if hasattr(tts_pipe.model.config, "sampling_rate") else 16000 # Save audio sf.write(filename, speech_array, sample_rate) print(f"✅ Audio saved as {filename}") return filename