Spaces:
Sleeping
Sleeping
| # DocVoice.py | |
| import torch | |
| from transformers import pipeline | |
| import soundfile as sf | |
| # ------------------- | |
| # 1οΈβ£ Detect GPU | |
| # ------------------- | |
| use_cuda = torch.cuda.is_available() | |
| device = 0 if use_cuda else -1 | |
| print(f"π Using {'GPU' if use_cuda else 'CPU'}") | |
| # ------------------- | |
| # 2οΈβ£ Load TTS model | |
| # ------------------- | |
| tts_model_id = "microsoft/speecht5_tts" # Compatible TTS model | |
| tts_pipe = pipeline( | |
| "text-to-speech", | |
| model=tts_model_id, | |
| device=device | |
| ) | |
| print("π TTS pipeline ready using Hugging Face.") | |
| # ------------------- | |
| # 3οΈβ£ TTS Helper Function | |
| # ------------------- | |
| def text_to_speech(text: str, filename="assistant_response.wav"): | |
| """ | |
| Generate speech from text and save as WAV file. | |
| """ | |
| if not text.strip(): | |
| return None | |
| print(f"π Generating audio for: {text}") | |
| speech_array = tts_pipe(text)[0]["array"] # returns numpy array | |
| sample_rate = tts_pipe.model.config.sampling_rate if hasattr(tts_pipe.model.config, "sampling_rate") else 16000 | |
| # Save audio | |
| sf.write(filename, speech_array, sample_rate) | |
| print(f"β Audio saved as {filename}") | |
| return filename | |