File size: 3,607 Bytes
2ae7eb5 c11c178 2ae7eb5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | #!/usr/bin/env python3
"""
Standalone inference script for Saudi MSA Piper TTS model
Works on any computer with the ONNX model files
"""
import argparse
import sys
from pathlib import Path
try:
from piper import PiperVoice
except ImportError:
print("Error: piper-tts not installed")
print("Install with: pip install piper-tts")
sys.exit(1)
def synthesize_text(text, model_path, output_path, config_path=None):
"""
Synthesize Arabic text to speech
Args:
text: Arabic text to synthesize
model_path: Path to ONNX model file
output_path: Path to save output WAV file
config_path: Optional path to config JSON file
"""
model_path = Path(model_path)
# Auto-detect config file if not provided
if config_path is None:
config_path = model_path.with_suffix('.onnx.json')
if not config_path.exists():
config_path = model_path.parent / 'config.json'
# Check if model exists
if not model_path.exists():
print(f"Error: Model file not found: {model_path}")
sys.exit(1)
# Load voice model
print(f"Loading model: {model_path}")
try:
voice = PiperVoice.load(str(model_path))
except Exception as e:
print(f"Error loading model: {e}")
sys.exit(1)
# Synthesize
print(f"Synthesizing: {text}")
try:
import wave
with wave.open(output_path, 'wb') as wav_file:
wav_file.setframerate(voice.config.sample_rate)
wav_file.setsampwidth(2) # 16-bit
wav_file.setnchannels(1) # Mono
for audio_chunk in voice.synthesize(text):
wav_file.writeframes(audio_chunk.audio_int16_bytes)
print(f"✓ Audio saved to: {output_path}")
except Exception as e:
print(f"Error during synthesis: {e}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description='Saudi MSA Piper TTS - Text to Speech Inference',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Synthesize from command line
python3 inference.py -t "مرحبا بك" -o output.wav
# Synthesize from stdin
echo "مرحبا بك" | python3 inference.py -o output.wav
# Specify custom model path
python3 inference.py -t "مرحبا بك" -m /path/to/model.onnx -o output.wav
# Read from file
cat text.txt | python3 inference.py -o output.wav
"""
)
parser.add_argument(
'-t', '--text',
type=str,
help='Arabic text to synthesize (if not provided, reads from stdin)'
)
parser.add_argument(
'-m', '--model',
type=str,
default='saudi_msa_epoch455.onnx',
help='Path to ONNX model file (default: saudi_msa_epoch455.onnx)'
)
parser.add_argument(
'-o', '--output',
type=str,
required=True,
help='Output WAV file path'
)
parser.add_argument(
'-c', '--config',
type=str,
help='Path to config JSON file (auto-detected if not provided)'
)
args = parser.parse_args()
# Get text from argument or stdin
if args.text:
text = args.text
else:
print("Reading text from stdin...")
text = sys.stdin.read().strip()
if not text:
print("Error: No text provided")
parser.print_help()
sys.exit(1)
# Synthesize
synthesize_text(text, args.model, args.output, args.config)
if __name__ == '__main__':
main()
|