File size: 3,220 Bytes
68a99fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
import logging
logging.getLogger().setLevel(logging.ERROR)

import argparse
import os
import sys
import time

TTS_ENGINE = None
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

def server_mode(args):
    while True:
        input = sys.stdin.readline().strip()
        input = input.split("voice")

        try: args.speed = float(input[0])
        except: args.speed = 1

        try: args.voice = int(input[1])
        except: args.voice = 8

        output_path = initiate(args)

        print(output_path)
        sys.stdout.flush()

def current_env():
    """Detect current virtual environment."""
    venv_path = os.environ.get("VIRTUAL_ENV")
    if venv_path:
        return os.path.basename(venv_path)
    raise ValueError("Please set env first")

def initiate(args):
    model = args.get('model') if isinstance(args, dict) else getattr(args, 'model', None)
    if not model:
        if current_env() == "kokoro_env":
            from .engines.kokoro import KokoroTTSProcessor as TTSEngine
        elif current_env() == "kitten_env":
            from .engines.kitten import KittenTTSProcessor as TTSEngine
        else:
            from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine
    else:
        if model == "kokoro":
            from .engines.kokoro import KokoroTTSProcessor as TTSEngine
        elif model == "kitten":
            from .engines.kitten import KittenTTSProcessor as TTSEngine
        else:
            from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine

    global TTS_ENGINE
    if not TTS_ENGINE:
        TTS_ENGINE = TTSEngine(stream_audio=args.stream_text)

    try:
        import torch
        import gc
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
        gc.collect()
        gc.collect()
        time.sleep(1)
        print("\n🧹 Cleared PyTorch CUDA cache")
    except: pass

    if args.stream_text:
        TTS_ENGINE.stream_real_time_text(args)
        text = TTS_ENGINE.read_content_file()
        for text_chunk in text.split():
            TTS_ENGINE.feed_text_chunk(text_chunk)
            time.sleep(0.1)  # Optional delay

        TTS_ENGINE.stop_all_streaming()
    else:
        TTS_ENGINE.save_audio(args)


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Text-to-Speech processor"
    )
    parser.add_argument(
        "--server-mode",
        action="store_true",
        help="Run in server mode (read commands from stdin)"
    )
    parser.add_argument(
        "--speed",
        type=float,
        help=f"Speech speed"
    )
    parser.add_argument(
        "--voice",
        type=int,
        help=f"Voice index"
    )
    parser.add_argument(
        "--stream-text",
        action="store_true",
        help="Enable streaming text output"
    )
    parser.add_argument(
        "--model",
        help="model name"
    )


    args = parser.parse_args()

    if args.server_mode:
        server_mode(args)
    else:
        success = initiate(args)
        return 0 if success else 1

if __name__ == "__main__":
    main()