File size: 3,220 Bytes
68a99fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
import logging
logging.getLogger().setLevel(logging.ERROR)
import argparse
import os
import sys
import time
TTS_ENGINE = None
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
def server_mode(args):
while True:
input = sys.stdin.readline().strip()
input = input.split("voice")
try: args.speed = float(input[0])
except: args.speed = 1
try: args.voice = int(input[1])
except: args.voice = 8
output_path = initiate(args)
print(output_path)
sys.stdout.flush()
def current_env():
"""Detect current virtual environment."""
venv_path = os.environ.get("VIRTUAL_ENV")
if venv_path:
return os.path.basename(venv_path)
raise ValueError("Please set env first")
def initiate(args):
model = args.get('model') if isinstance(args, dict) else getattr(args, 'model', None)
if not model:
if current_env() == "kokoro_env":
from .engines.kokoro import KokoroTTSProcessor as TTSEngine
elif current_env() == "kitten_env":
from .engines.kitten import KittenTTSProcessor as TTSEngine
else:
from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine
else:
if model == "kokoro":
from .engines.kokoro import KokoroTTSProcessor as TTSEngine
elif model == "kitten":
from .engines.kitten import KittenTTSProcessor as TTSEngine
else:
from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine
global TTS_ENGINE
if not TTS_ENGINE:
TTS_ENGINE = TTSEngine(stream_audio=args.stream_text)
try:
import torch
import gc
torch.cuda.empty_cache()
torch.cuda.synchronize()
gc.collect()
gc.collect()
time.sleep(1)
print("\n🧹 Cleared PyTorch CUDA cache")
except: pass
if args.stream_text:
TTS_ENGINE.stream_real_time_text(args)
text = TTS_ENGINE.read_content_file()
for text_chunk in text.split():
TTS_ENGINE.feed_text_chunk(text_chunk)
time.sleep(0.1) # Optional delay
TTS_ENGINE.stop_all_streaming()
else:
TTS_ENGINE.save_audio(args)
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Text-to-Speech processor"
)
parser.add_argument(
"--server-mode",
action="store_true",
help="Run in server mode (read commands from stdin)"
)
parser.add_argument(
"--speed",
type=float,
help=f"Speech speed"
)
parser.add_argument(
"--voice",
type=int,
help=f"Voice index"
)
parser.add_argument(
"--stream-text",
action="store_true",
help="Enable streaming text output"
)
parser.add_argument(
"--model",
help="model name"
)
args = parser.parse_args()
if args.server_mode:
server_mode(args)
else:
success = initiate(args)
return 0 if success else 1
if __name__ == "__main__":
main()
|