Muhammadidrees commited on
Commit
9e218ef
Β·
verified Β·
1 Parent(s): c7de4ce

Update DocVoice.py

Browse files
Files changed (1) hide show
  1. DocVoice.py +15 -25
DocVoice.py CHANGED
@@ -1,25 +1,24 @@
1
  # DocVoice.py
2
  import torch
3
  from transformers import pipeline
 
4
 
5
  # -------------------
6
  # 1️⃣ Detect GPU
7
  # -------------------
8
  use_cuda = torch.cuda.is_available()
9
- device_index = 0 if use_cuda else -1
10
- device_str = "cuda" if use_cuda else "cpu"
11
- dtype = torch.float16 if use_cuda else torch.float32
12
 
13
  # -------------------
14
- # 2️⃣ Load TTS model from Hugging Face
15
  # -------------------
16
- tts_model_id = "espnet/kan-bayashi_ljspeech_vits" # Example TTS model, English voice
17
 
18
  tts_pipe = pipeline(
19
  "text-to-speech",
20
  model=tts_model_id,
21
- device=device_index,
22
- torch_dtype=dtype
23
  )
24
 
25
  print("πŸ”Š TTS pipeline ready using Hugging Face.")
@@ -32,23 +31,14 @@ def text_to_speech(text: str, filename="assistant_response.wav"):
32
  Generate speech from text and save as WAV file.
33
  """
34
  if not text.strip():
35
- return
36
-
37
  print(f"πŸ“ Generating audio for: {text}")
38
-
39
- # Generate audio
40
- speech_array = tts_pipe(text)["audio"]
41
-
42
- # Convert to int16 and save as WAV
43
- import numpy as np
44
- import scipy.io.wavfile as wav
45
-
46
- wav.write(filename, 22050, (speech_array * 32767).astype(np.int16))
47
  print(f"βœ… Audio saved as {filename}")
48
-
49
- # Optional: play audio automatically (requires sounddevice)
50
- try:
51
- import sounddevice as sd
52
- sd.play(speech_array, samplerate=22050)
53
- except Exception as e:
54
- print(f"⚠️ Could not play audio automatically: {e}")
 
1
  # DocVoice.py
2
  import torch
3
  from transformers import pipeline
4
+ import soundfile as sf
5
 
6
  # -------------------
7
  # 1️⃣ Detect GPU
8
  # -------------------
9
  use_cuda = torch.cuda.is_available()
10
+ device = 0 if use_cuda else -1
11
+ print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}")
 
12
 
13
  # -------------------
14
+ # 2️⃣ Load TTS model
15
  # -------------------
16
+ tts_model_id = "microsoft/speecht5_tts" # Compatible TTS model
17
 
18
  tts_pipe = pipeline(
19
  "text-to-speech",
20
  model=tts_model_id,
21
+ device=device
 
22
  )
23
 
24
  print("πŸ”Š TTS pipeline ready using Hugging Face.")
 
31
  Generate speech from text and save as WAV file.
32
  """
33
  if not text.strip():
34
+ return None
35
+
36
  print(f"πŸ“ Generating audio for: {text}")
37
+
38
+ speech_array = tts_pipe(text)[0]["array"] # returns numpy array
39
+ sample_rate = tts_pipe.model.config.sampling_rate if hasattr(tts_pipe.model.config, "sampling_rate") else 16000
40
+
41
+ # Save audio
42
+ sf.write(filename, speech_array, sample_rate)
 
 
 
43
  print(f"βœ… Audio saved as {filename}")
44
+ return filename