import sys import torch from qwen_tts.cli.demo import main if __name__ == '__main__': # Determine the best device and dtype based on Hugging Face Spaces environment # By default, free HF Spaces run on CPU. GPU spaces have CUDA. device = "cuda:0" if torch.cuda.is_available() else "cpu" dtype = "float16" if torch.cuda.is_available() else "float32" # You can change the model ID here based on what you want to deploy: # "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" # "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign" # "Qwen/Qwen3-TTS-12Hz-1.7B-Base" model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" # Construct the arguments expected by the qwen-tts-demo CLI sys.argv = [ "qwen-tts-demo", model_id, "--device", device, "--dtype", dtype, "--ip", "0.0.0.0", "--port", "7860" # Standard Hugging Face Spaces Gradio port ] # flash-attn is only available/supported if we have CUDA if not torch.cuda.is_available(): sys.argv.append("--no-flash-attn") sys.exit(main())