File size: 3,383 Bytes
19933fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2886be7
 
 
19933fe
 
 
 
 
 
 
 
 
2886be7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19933fe
 
 
2886be7
19933fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2886be7
19933fe
 
 
 
 
 
1c7725b
 
 
 
 
 
 
19933fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""CLI entry point for the Reader server."""

from __future__ import annotations

import argparse
import sys


def main() -> int:
    """Main entry point for the Reader CLI.

    Returns:
        Exit code (0 for success).
    """
    parser = argparse.ArgumentParser(
        prog="reader",
        description="PDF-to-Speech web server - listen to any content",
    )
    parser.add_argument(
        "--voice",
        type=str,
        default=None,
        help="Voice name for TTS. Options: Vivian, Serena, Uncle_Fu, Dylan, Eric, "
        "Ryan, Aiden, Ono_Anna, Sohee (default: auto based on language)",
    )
    parser.add_argument(
        "--language",
        type=str,
        default="english",
        choices=["english", "chinese", "japanese", "korean"],
        help="Language for TTS (default: english). Sets default voice if --voice not specified.",
    )
    parser.add_argument(
        "--host",
        type=str,
        default="0.0.0.0",
        help="Host to bind the server to (default: 0.0.0.0)",
    )
    parser.add_argument(
        "--port",
        type=int,
        default=8000,
        help="Port to bind the server to (default: 8000)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="auto",
        choices=["auto", "cuda", "cpu"],
        help="Device to run the TTS model on (default: auto, detects GPU)",
    )
    parser.add_argument(
        "--reload",
        action="store_true",
        help="Enable auto-reload for development",
    )

    args = parser.parse_args()

    # Auto-detect device if set to 'auto'
    device = args.device
    if device == "auto":
        try:
            import torch

            if torch.cuda.is_available():
                device = "cuda"
                print("๐ŸŽฎ GPU detected, using CUDA")
            else:
                device = "cpu"
                print("๐Ÿ’ป No GPU detected, using CPU (slower but works!)")
        except ImportError:
            device = "cpu"
            print("๐Ÿ’ป PyTorch not available for detection, using CPU")

    print("๐Ÿš€ Starting Reader server...")
    print(f"   Language: {args.language}")
    print(f"   Voice:    {args.voice or 'auto'}")
    print(f"   Device:   {device}")
    print(f"   URL:      http://{args.host}:{args.port}")
    print()

    # Import here to avoid slow startup for --help
    import uvicorn

    from talking_snake.app import create_app
    from talking_snake.tts import QwenTTSEngine

    # Initialize TTS engine
    print("๐Ÿ“ฆ Loading TTS model (this may take a moment)...")
    try:
        tts_engine = QwenTTSEngine(
            voice=args.voice,
            language=args.language,
            device=device,
        )
    except Exception as e:
        print(f"โŒ Failed to load TTS model: {e}", file=sys.stderr)
        return 1

    print("โœ… TTS model loaded!")

    # Run calibration to get accurate time estimates
    print("โฑ๏ธ  Calibrating speech timing...")
    try:
        tts_engine.calibrate()
    except Exception as e:
        print(f"โš ๏ธ  Calibration failed (using defaults): {e}")
    print()

    # Create app with engine
    app = create_app(tts_engine=tts_engine)

    # Run server
    uvicorn.run(
        app,
        host=args.host,
        port=args.port,
        log_level="info",
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())