File size: 3,607 Bytes

#!/usr/bin/env python3
"""
Standalone inference script for Saudi MSA Piper TTS model
Works on any computer with the ONNX model files
"""

import argparse
import sys
from pathlib import Path

try:
    from piper import PiperVoice
except ImportError:
    print("Error: piper-tts not installed")
    print("Install with: pip install piper-tts")
    sys.exit(1)


def synthesize_text(text, model_path, output_path, config_path=None):
    """
    Synthesize Arabic text to speech
    
    Args:
        text: Arabic text to synthesize
        model_path: Path to ONNX model file
        output_path: Path to save output WAV file
        config_path: Optional path to config JSON file
    """
    model_path = Path(model_path)
    
    # Auto-detect config file if not provided
    if config_path is None:
        config_path = model_path.with_suffix('.onnx.json')
        if not config_path.exists():
            config_path = model_path.parent / 'config.json'
    
    # Check if model exists
    if not model_path.exists():
        print(f"Error: Model file not found: {model_path}")
        sys.exit(1)
    
    # Load voice model
    print(f"Loading model: {model_path}")
    try:
        voice = PiperVoice.load(str(model_path))
    except Exception as e:
        print(f"Error loading model: {e}")
        sys.exit(1)
    
    # Synthesize
    print(f"Synthesizing: {text}")
    try:
        import wave
        
        with wave.open(output_path, 'wb') as wav_file:
            wav_file.setframerate(voice.config.sample_rate)
            wav_file.setsampwidth(2)  # 16-bit
            wav_file.setnchannels(1)  # Mono
            
            for audio_chunk in voice.synthesize(text):
                wav_file.writeframes(audio_chunk.audio_int16_bytes)
        
        print(f"✓ Audio saved to: {output_path}")
    except Exception as e:
        print(f"Error during synthesis: {e}")
        sys.exit(1)


def main():
    parser = argparse.ArgumentParser(
        description='Saudi MSA Piper TTS - Text to Speech Inference',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Synthesize from command line
  python3 inference.py -t "مرحبا بك" -o output.wav
  
  # Synthesize from stdin
  echo "مرحبا بك" | python3 inference.py -o output.wav
  
  # Specify custom model path
  python3 inference.py -t "مرحبا بك" -m /path/to/model.onnx -o output.wav
  
  # Read from file
  cat text.txt | python3 inference.py -o output.wav
        """
    )
    
    parser.add_argument(
        '-t', '--text',
        type=str,
        help='Arabic text to synthesize (if not provided, reads from stdin)'
    )
    
    parser.add_argument(
        '-m', '--model',
        type=str,
        default='saudi_msa_epoch455.onnx',
        help='Path to ONNX model file (default: saudi_msa_epoch455.onnx)'
    )
    
    parser.add_argument(
        '-o', '--output',
        type=str,
        required=True,
        help='Output WAV file path'
    )
    
    parser.add_argument(
        '-c', '--config',
        type=str,
        help='Path to config JSON file (auto-detected if not provided)'
    )
    
    args = parser.parse_args()
    
    # Get text from argument or stdin
    if args.text:
        text = args.text
    else:
        print("Reading text from stdin...")
        text = sys.stdin.read().strip()
    
    if not text:
        print("Error: No text provided")
        parser.print_help()
        sys.exit(1)
    
    # Synthesize
    synthesize_text(text, args.model, args.output, args.config)


if __name__ == '__main__':
    main()