File size: 3,453 Bytes
f20a8ad
 
 
 
 
7a85341
f20a8ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a85341
 
 
 
f20a8ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a85341
 
f20a8ad
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
import re
import asyncio
from supertonic import TTS
import base_model
import utils
class StreamingEngine(base_model.BaseEngine):
    def __init__(self, name):
        # 1. Initialize configuration variables first
        self.default_voice = "F1"
        self.voice_mapping = {
            "alloy": "F1",
            "echo": "M1",
            "fable": "M2",
            "onyx": "M3",
            "nova": "F2",
            "shimmer": "F3"
        }
        
        # 2. Call super init (which usually calls load_model)
        super().__init__(name)

    def load_model(self):
        try:
            self.tts = TTS(auto_download=True)
            self.text_processor = self.tts.model.text_processor
            self.sample_rate = self.tts.sample_rate
            print(f"Model Loaded. Rate: {self.sample_rate}")
        except Exception as e:
            # 3. CRITICAL FIX: Don't sys.exit(1). Raise exception instead.
            print(f"Error initializing model {self.name}: {e}")
            raise RuntimeError(f"Failed to load model {self.name}") from e

    def get_style_safe(self, voice_name: str):
        """
        Safely retrieves a voice style. 
        """
        # 4. Logic optimized: Map -> Try -> Fallback
        clean_name = voice_name.lower().strip()
        target_name = self.voice_mapping.get(clean_name, self.default_voice)
        print(f"Found voice {target_name}")

        try:
            # Try specific voice
            return self.tts.get_voice_style(voice_name=target_name)
        except Exception:
            print(f"WARNING: Voice '{voice_name}' (mapped to '{target_name}') not found. Using '{self.default_voice}'.")
            
            # Fallback to default
            try:
                return self.tts.get_voice_style(voice_name=self.default_voice)
            except Exception as e:
                # If default fails, we are in trouble
                print(f"CRITICAL: Default voice '{self.default_voice}' also failed.")
                raise e

    def preprocess_text(self, text):
        if not text:
            return []
        
        split_pattern = r'\n+'


        is_valid, unsupported = self.text_processor.validate_text(text)

        if not is_valid:
            print(f"   ⚠️  Contains {len(unsupported)} unsupported character(s): {unsupported[:5]}")
            # Escape characters safe for regex usage
            pattern = f"[{re.escape(''.join(unsupported))}]"
            preprocessed = re.sub(pattern, "", text)
            
            if preprocessed != text:
                print(f"   After preprocessing: {preprocessed[:50]}...")
                text = preprocessed
        else:
            # Optional: Comment this out in production to reduce log spam
            print("   ✓ All characters supported")
            
        chunks = utils.split_text_into_sentences(text, min_chunk_size=150)
        return chunks

    def generate(self, chunks: str, voice_name: str, speed: float):
        """
        Generates audio.
        Returns: audio_float_array
        """
        # 5. Handle Speed (if supported by supertonic, otherwise ignore or warn)
        # Assuming supertonic.synthesize supports a speed or speed_ratio argument:
        # audio = self.tts.synthesize(chunks, voice_name, speed=speed)
        
        # If supertonic DOES NOT support speed, simple generation:
        audio,_ = self.tts.synthesize(chunks, voice_name)
        yield audio