File size: 7,089 Bytes
78cb487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import torch
from pathlib import Path
import json
import os


def load_config():
    """Loads configuration from config.json.

    Returns:
        dict: The configuration loaded from the JSON file.

    Raises:
        FileNotFoundError: If the config.json file is not found.
    """
    config_path = Path(__file__).parent.parent / "config" / "config.json"
    if not config_path.exists():
        raise FileNotFoundError(f"Config file not found at {config_path}")
    with open(config_path) as f:
        return json.load(f)


def get_available_voices(voices_dir):
    """Gets a list of available voice names without the .pt extension.

    Args:
        voices_dir (str): The path to the directory containing voice files.

    Returns:
        list: A list of voice names (strings).
    """
    voices_dir = Path(voices_dir)
    if not voices_dir.exists():
        return []
    return [f.stem for f in voices_dir.glob("*.pt")]


def validate_voice_name(voice_name, voices_dir):
    """Validates that a voice name exists in the voices directory.

    Args:
        voice_name (str): The name of the voice to validate.
        voices_dir (str): The path to the directory containing voice files.

    Returns:
        bool: True if the voice name is valid.

    Raises:
        ValueError: If the voice name is not found in the voices directory.
    """
    available_voices = get_available_voices(voices_dir)
    if voice_name not in available_voices:
        raise ValueError(
            f"Voice '{voice_name}' not found. Available voices: {', '.join(available_voices)}"
        )
    return True


def load_voice(voice_name, voices_dir):
    """Loads a voice from the voices directory.

    Args:
        voice_name (str): The name of the voice to load.
        voices_dir (str): The path to the directory containing voice files.

    Returns:
        torch.Tensor: The loaded voice as a torch tensor.

    Raises:
        AssertionError: If the voices directory or voice file does not exist, or if the voice path is not a file.
        RuntimeError: If there is an error loading the voice file or converting it to a tensor.
    """
    voices_dir = Path(voices_dir)
    assert voices_dir.exists(), f"Voices directory does not exist: {voices_dir}"
    assert voices_dir.is_dir(), f"Voices path is not a directory: {voices_dir}"

    validate_voice_name(voice_name, voices_dir)

    voice_path = voices_dir / f"{voice_name}.pt"
    assert voice_path.exists(), f"Voice file not found: {voice_path}"
    assert voice_path.is_file(), f"Voice path is not a file: {voice_path}"

    try:
        voice = torch.load(voice_path, weights_only=True)
    except Exception as e:
        raise RuntimeError(f"Error loading voice file {voice_path}: {str(e)}")

    if not isinstance(voice, torch.Tensor):
        try:
            voice = torch.tensor(voice)
        except Exception as e:
            raise RuntimeError(f"Could not convert voice to tensor: {str(e)}")

    return voice


def quick_mix_voice(output_name, voices_dir, *voices, weights=None):
    """Mixes and saves voices with specified weights.

    Args:
        output_name (str): The name of the output mixed voice file (without extension).
        voices_dir (str): The path to the directory containing voice files.
        *voices (torch.Tensor): Variable number of voice tensors to mix.
        weights (list, optional): List of weights for each voice. Defaults to equal weights if None.

    Returns:
        torch.Tensor: The mixed voice as a torch tensor.

    Raises:
        ValueError: If no voices are provided, if the number of weights does not match the number of voices, or if the sum of weights is not positive.
        AssertionError: If the voices directory does not exist or is not a directory.
    """
    voices_dir = Path(voices_dir)
    assert voices_dir.exists(), f"Voices directory does not exist: {voices_dir}"
    assert voices_dir.is_dir(), f"Voices path is not a directory: {voices_dir}"

    if not voices:
        raise ValueError("Must provide at least one voice")

    base_shape = voices[0].shape
    for i, voice in enumerate(voices):
        if not isinstance(voice, torch.Tensor):
            raise ValueError(f"Voice {i} is not a tensor")
        if voice.shape != base_shape:
            raise ValueError(
                f"Voice {i} has shape {voice.shape}, but expected {base_shape} (same as first voice)"
            )

    if weights is None:
        weights = [1.0 / len(voices)] * len(voices)
    else:
        if len(weights) != len(voices):
            raise ValueError(
                f"Number of weights ({len(weights)}) must match number of voices ({len(voices)})"
            )
        weights_sum = sum(weights)
        if weights_sum <= 0:
            raise ValueError("Sum of weights must be positive")
        weights = [w / weights_sum for w in weights]

    device = voices[0].device
    voices = [v.to(device) for v in voices]

    stacked = torch.stack(voices)
    weights = torch.tensor(weights, device=device)

    mixed = torch.zeros_like(voices[0])
    for i, weight in enumerate(weights):
        mixed += stacked[i] * weight

    output_path = voices_dir / f"{output_name}.pt"
    torch.save(mixed, output_path)
    print(f"Created mixed voice: {output_name}.pt")
    return mixed


def split_into_sentences(text):
    """Splits text into sentences using more robust rules.

    Args:
        text (str): The input text to split.

    Returns:
        list: A list of sentences (strings).
    """
    import re

    text = text.strip()
    if not text:
        return []

    abbreviations = {
        "Mr.": "Mr",
        "Mrs.": "Mrs",
        "Dr.": "Dr",
        "Ms.": "Ms",
        "Prof.": "Prof",
        "Sr.": "Sr",
        "Jr.": "Jr",
        "vs.": "vs",
        "etc.": "etc",
        "i.e.": "ie",
        "e.g.": "eg",
        "a.m.": "am",
        "p.m.": "pm",
    }

    for abbr, repl in abbreviations.items():
        text = text.replace(abbr, repl)

    sentences = []
    current = []

    words = re.findall(r"\S+|\s+", text)

    for word in words:
        current.append(word)

        if re.search(r"[.!?]+$", word):
            if not re.match(r"^[A-Z][a-z]{1,2}$", word[:-1]):
                sentence = "".join(current).strip()
                if sentence:
                    sentences.append(sentence)
                current = []
                continue

    if current:
        sentence = "".join(current).strip()
        if sentence:
            sentences.append(sentence)

    for abbr, repl in abbreviations.items():
        sentences = [s.replace(repl, abbr) for s in sentences]

    sentences = [s.strip() for s in sentences if s.strip()]

    final_sentences = []
    for s in sentences:
        if len(s) > 200:
            parts = s.split(",")
            parts = [p.strip() for p in parts if p.strip()]
            if len(parts) > 1:
                final_sentences.extend(parts)
            else:
                final_sentences.append(s)
        else:
            final_sentences.append(s)

    return final_sentences