saudi-msa-piper / inference.py
ISTNetworks's picture
Fix inference.py - properly write WAV files with audio data
c11c178 verified
#!/usr/bin/env python3
"""
Standalone inference script for Saudi MSA Piper TTS model
Works on any computer with the ONNX model files
"""
import argparse
import sys
from pathlib import Path
try:
from piper import PiperVoice
except ImportError:
print("Error: piper-tts not installed")
print("Install with: pip install piper-tts")
sys.exit(1)
def synthesize_text(text, model_path, output_path, config_path=None):
"""
Synthesize Arabic text to speech
Args:
text: Arabic text to synthesize
model_path: Path to ONNX model file
output_path: Path to save output WAV file
config_path: Optional path to config JSON file
"""
model_path = Path(model_path)
# Auto-detect config file if not provided
if config_path is None:
config_path = model_path.with_suffix('.onnx.json')
if not config_path.exists():
config_path = model_path.parent / 'config.json'
# Check if model exists
if not model_path.exists():
print(f"Error: Model file not found: {model_path}")
sys.exit(1)
# Load voice model
print(f"Loading model: {model_path}")
try:
voice = PiperVoice.load(str(model_path))
except Exception as e:
print(f"Error loading model: {e}")
sys.exit(1)
# Synthesize
print(f"Synthesizing: {text}")
try:
import wave
with wave.open(output_path, 'wb') as wav_file:
wav_file.setframerate(voice.config.sample_rate)
wav_file.setsampwidth(2) # 16-bit
wav_file.setnchannels(1) # Mono
for audio_chunk in voice.synthesize(text):
wav_file.writeframes(audio_chunk.audio_int16_bytes)
print(f"✓ Audio saved to: {output_path}")
except Exception as e:
print(f"Error during synthesis: {e}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description='Saudi MSA Piper TTS - Text to Speech Inference',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Synthesize from command line
python3 inference.py -t "مرحبا بك" -o output.wav
# Synthesize from stdin
echo "مرحبا بك" | python3 inference.py -o output.wav
# Specify custom model path
python3 inference.py -t "مرحبا بك" -m /path/to/model.onnx -o output.wav
# Read from file
cat text.txt | python3 inference.py -o output.wav
"""
)
parser.add_argument(
'-t', '--text',
type=str,
help='Arabic text to synthesize (if not provided, reads from stdin)'
)
parser.add_argument(
'-m', '--model',
type=str,
default='saudi_msa_epoch455.onnx',
help='Path to ONNX model file (default: saudi_msa_epoch455.onnx)'
)
parser.add_argument(
'-o', '--output',
type=str,
required=True,
help='Output WAV file path'
)
parser.add_argument(
'-c', '--config',
type=str,
help='Path to config JSON file (auto-detected if not provided)'
)
args = parser.parse_args()
# Get text from argument or stdin
if args.text:
text = args.text
else:
print("Reading text from stdin...")
text = sys.stdin.read().strip()
if not text:
print("Error: No text provided")
parser.print_help()
sys.exit(1)
# Synthesize
synthesize_text(text, args.model, args.output, args.config)
if __name__ == '__main__':
main()