| import torch |
| from TTS.api import TTS |
| import os |
| import numpy as np |
| from scipy.io import wavfile |
| os.environ["COQUI_TOS_AGREED"] = "1" |
| |
| _original_torch_load = torch.load |
|
|
| |
| def custom_torch_load(*args, **kwargs): |
| if "weights_only" not in kwargs: |
| kwargs["weights_only"] = False |
| return _original_torch_load(*args, **kwargs) |
|
|
| |
| torch.load = custom_torch_load |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"Using device: {device}") |
|
|
|
|
| |
| def create_audiobook(english_transcript,speaker_list): |
| parent_wav = [] |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
| for text in english_transcript: |
| print(text['char_id'],text['text']) |
| |
| voice_speaker = '' |
| if text['char_id'] != None: |
| |
| print("voice person",speaker_list[text['char_id']]) |
| voice_speaker = speaker_list[text['char_id']] |
| else: |
| print("voice person",speaker_list[-1]) |
| voice_speaker = speaker_list[-1] |
| |
|
|
| try: |
|
|
| wav = tts.tts( |
| text=text['text'], |
| speaker=voice_speaker.replace('_',' '), |
| language="en" |
| ) |
| print("✓ Audio Generated") |
| parent_wav.append(wav) |
|
|
| except Exception as e: |
| print(f"✗ Error with : {e}") |
| combined = np.concatenate(parent_wav) |
| return combined,tts.synthesizer.output_sample_rate |
|
|
| |