meraj12 commited on
Commit
a9bed7f
·
verified ·
1 Parent(s): 4f74f3b

Update voice_cloner.py

Browse files
Files changed (1) hide show
  1. voice_cloner.py +21 -9
voice_cloner.py CHANGED
@@ -1,18 +1,30 @@
1
- from transformers import BarkModel, BarkProcessor
 
 
2
  import torch
 
3
 
4
- # You could switch to xtts-v2 or Tortoise if needed
5
- def clone_and_generate_text(text, reference_audio_path, output_path="clone_output.wav"):
6
- processor = BarkProcessor.from_pretrained("suno/bark")
7
  model = BarkModel.from_pretrained("suno/bark")
8
 
 
 
9
 
10
- # Preprocess input
11
- inputs = processor(text=text, voice_preset="v2/en_speaker_9", return_tensors="pt").to("cpu")
 
 
12
 
13
- # Generate speech
14
- speech = model.generate(**inputs)
 
 
 
15
 
16
- # Save output
 
 
 
17
  torchaudio.save(output_path, speech.cpu(), 22050)
18
  return output_path
 
1
+ # voice_cloner.py
2
+ from transformers import BarkModel, AutoProcessor
3
+ import torchaudio
4
  import torch
5
+ import os
6
 
7
+ def clone_and_generate_text(text, reference_audio_path, language="English", emotion="Neutral"):
8
+ processor = AutoProcessor.from_pretrained("suno/bark")
 
9
  model = BarkModel.from_pretrained("suno/bark")
10
 
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ model.to(device)
13
 
14
+ # Load and process reference audio
15
+ speech_array, sampling_rate = torchaudio.load(reference_audio_path)
16
+ speech_array = torchaudio.functional.resample(speech_array, sampling_rate, 16000)
17
+ speech_array = speech_array.mean(dim=0).unsqueeze(0) # mono
18
 
19
+ inputs = processor(
20
+ text=text,
21
+ voice_preset="v2/en_speaker_9", # generic fallback voice
22
+ return_tensors="pt"
23
+ ).to(device)
24
 
25
+ with torch.no_grad():
26
+ speech = model.generate(**inputs)
27
+
28
+ output_path = "output_voice.wav"
29
  torchaudio.save(output_path, speech.cpu(), 22050)
30
  return output_path