import sys
import torch
from qwen_tts.cli.demo import main

if __name__ == '__main__':
    # Determine the best device and dtype based on Hugging Face Spaces environment
    # By default, free HF Spaces run on CPU. GPU spaces have CUDA.
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    dtype = "float16" if torch.cuda.is_available() else "float32"
    
    # You can change the model ID here based on what you want to deploy:
    # "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
    # "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign"
    # "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
    model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
    
    # Construct the arguments expected by the qwen-tts-demo CLI
    sys.argv = [
        "qwen-tts-demo", 
        model_id, 
        "--device", device,
        "--dtype", dtype,
        "--ip", "0.0.0.0", 
        "--port", "7860" # Standard Hugging Face Spaces Gradio port
    ]
    
    # flash-attn is only available/supported if we have CUDA
    if not torch.cuda.is_available():
        sys.argv.append("--no-flash-attn")
        
    sys.exit(main())