File size: 3,607 Bytes
2ae7eb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c11c178
 
 
 
 
 
 
 
 
 
2ae7eb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
"""
Standalone inference script for Saudi MSA Piper TTS model
Works on any computer with the ONNX model files
"""

import argparse
import sys
from pathlib import Path

try:
    from piper import PiperVoice
except ImportError:
    print("Error: piper-tts not installed")
    print("Install with: pip install piper-tts")
    sys.exit(1)


def synthesize_text(text, model_path, output_path, config_path=None):
    """
    Synthesize Arabic text to speech
    
    Args:
        text: Arabic text to synthesize
        model_path: Path to ONNX model file
        output_path: Path to save output WAV file
        config_path: Optional path to config JSON file
    """
    model_path = Path(model_path)
    
    # Auto-detect config file if not provided
    if config_path is None:
        config_path = model_path.with_suffix('.onnx.json')
        if not config_path.exists():
            config_path = model_path.parent / 'config.json'
    
    # Check if model exists
    if not model_path.exists():
        print(f"Error: Model file not found: {model_path}")
        sys.exit(1)
    
    # Load voice model
    print(f"Loading model: {model_path}")
    try:
        voice = PiperVoice.load(str(model_path))
    except Exception as e:
        print(f"Error loading model: {e}")
        sys.exit(1)
    
    # Synthesize
    print(f"Synthesizing: {text}")
    try:
        import wave
        
        with wave.open(output_path, 'wb') as wav_file:
            wav_file.setframerate(voice.config.sample_rate)
            wav_file.setsampwidth(2)  # 16-bit
            wav_file.setnchannels(1)  # Mono
            
            for audio_chunk in voice.synthesize(text):
                wav_file.writeframes(audio_chunk.audio_int16_bytes)
        
        print(f"✓ Audio saved to: {output_path}")
    except Exception as e:
        print(f"Error during synthesis: {e}")
        sys.exit(1)


def main():
    parser = argparse.ArgumentParser(
        description='Saudi MSA Piper TTS - Text to Speech Inference',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Synthesize from command line
  python3 inference.py -t "مرحبا بك" -o output.wav
  
  # Synthesize from stdin
  echo "مرحبا بك" | python3 inference.py -o output.wav
  
  # Specify custom model path
  python3 inference.py -t "مرحبا بك" -m /path/to/model.onnx -o output.wav
  
  # Read from file
  cat text.txt | python3 inference.py -o output.wav
        """
    )
    
    parser.add_argument(
        '-t', '--text',
        type=str,
        help='Arabic text to synthesize (if not provided, reads from stdin)'
    )
    
    parser.add_argument(
        '-m', '--model',
        type=str,
        default='saudi_msa_epoch455.onnx',
        help='Path to ONNX model file (default: saudi_msa_epoch455.onnx)'
    )
    
    parser.add_argument(
        '-o', '--output',
        type=str,
        required=True,
        help='Output WAV file path'
    )
    
    parser.add_argument(
        '-c', '--config',
        type=str,
        help='Path to config JSON file (auto-detected if not provided)'
    )
    
    args = parser.parse_args()
    
    # Get text from argument or stdin
    if args.text:
        text = args.text
    else:
        print("Reading text from stdin...")
        text = sys.stdin.read().strip()
    
    if not text:
        print("Error: No text provided")
        parser.print_help()
        sys.exit(1)
    
    # Synthesize
    synthesize_text(text, args.model, args.output, args.config)


if __name__ == '__main__':
    main()