Spaces:

HedronCreeper
/

speech

Running

speech / app.py

CryptoCreeper

Update app.py

c885f0d verified 7 days ago

3.22 kB

	import gradio as gr
	import torch
	import soundfile as sf
	from qwen_tts import Qwen3TTSModel
	from langdetect import detect
	import os

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"

	# Supported voices for this specific model
	SUPPORTED_VOICES = [
	'aiden', 'dylan', 'eric', 'ono_anna',
	'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian'
	]

	print(f"Loading Qwen3-TTS to {device}...")
	model = Qwen3TTSModel.from_pretrained(
	model_id,
	device_map=device,
	torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32
	)

	def smart_tts(text, voice, instructions, auto_detect):
	try:
	if voice not in SUPPORTED_VOICES:
	return None, f"Error: Voice '{voice}' is not in the supported list."

	# Smart Language Detection Mapping
	lang_map = {
	'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
	'ko': 'Korean', 'de': 'German', 'fr': 'French',
	'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
	}

	detected_lang = "English"
	if auto_detect:
	try:
	raw_lang = detect(text).split('-')[0]
	detected_lang = lang_map.get(raw_lang, "English")
	except:
	pass

	# Generate Audio using the specific speaker ID
	wavs, sr = model.generate_custom_voice(
	language=detected_lang,
	speaker=voice,
	instruct=instructions,
	text=text
	)

	output_path = "output.wav"
	sf.write(output_path, wavs[0], sr)
	return output_path, f"Language: {detected_lang} \| Speaker: {voice}"

	except Exception as e:
	return None, f"System Error: {str(e)}"

	# UI Layout
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🗣️ Qwen3-TTS Smart Studio")
	gr.Markdown(f"Optimized for {model_id} on Hugging Face Free Tier.")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Text to Speak",
	placeholder="Enter text here...",
	lines=4
	)

	with gr.Row():
	voice_select = gr.Dropdown(
	choices=SUPPORTED_VOICES,
	value="vivian",
	label="Select Speaker"
	)
	auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)

	style_instruct = gr.Textbox(
	label="Style/Emotion Instruction",
	placeholder="e.g. Speak with a professional tone, Whisper, or Excitedly",
	value="Speak naturally"
	)

	generate_btn = gr.Button("Generate Audio", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(label="Result", type="filepath")
	status_info = gr.Label(label="Metadata")

	generate_btn.click(
	fn=smart_tts,
	inputs=[input_text, voice_select, style_instruct, auto_lang],
	outputs=[audio_output, status_info]
	)

	if __name__ == "__main__":
	demo.launch()