Spaces:

mnhatdaous
/

learnable-speech

Sleeping

App Files Files Community

learnable-speech / app.py

mnhatdaous

Fix Gradio app deployment issues

36fbe52 3 months ago

raw

history blame contribute delete

3.88 kB

	import gradio as gr
	import numpy as np
	import os

	def synthesize_speech(text, speaker_id=0):
	"""
	Placeholder function for speech synthesis
	Replace this with actual model inference when you have trained models
	"""
	if not text.strip():
	return None

	sample_rate = 24000
	duration = max(1.0, len(text) * 0.08) # rough estimate
	samples = int(sample_rate * duration)

	# Generate sine-based waveform
	t = np.linspace(0, duration, samples, endpoint=False)
	frequency = 440 + (speaker_id * 50)

	audio = (
	0.3 * np.sin(2 * np.pi * frequency * t) * np.exp(-t/(duration*0.8)) +
	0.1 * np.sin(2 * np.pi * frequency * 2 * t) * np.exp(-t/duration) +
	0.05 * np.random.randn(samples)
	)

	# Fade in/out safely
	fade_samples = min(int(0.1 * sample_rate), samples // 2)
	if fade_samples > 0:
	audio[:fade_samples] *= np.linspace(0, 1, fade_samples)
	audio[-fade_samples:] *= np.linspace(1, 0, fade_samples)

	return (sample_rate, audio.astype(np.float32))

	def create_demo():
	with gr.Blocks(
	title="Learnable-Speech Demo",
	theme=gr.themes.Default(),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	}
	"""
	) as demo:
	gr.Markdown(
	"""
	# 🎤 Learnable-Speech: High-Quality 24kHz Speech Synthesis

	An unofficial implementation based on improvements of CosyVoice with learnable encoder and DAC-VAE.

	> ⚠️ This is a demo interface with placeholder audio. To use the actual model, you need to train it first!
	"""
	)

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Text to synthesize",
	placeholder="Enter text here...",
	lines=3,
	value="Hello, this is a demo of Learnable-Speech synthesis."
	)

	speaker_slider = gr.Slider(
	minimum=0,
	maximum=10,
	value=0,
	step=1,
	label="Speaker ID"
	)

	generate_btn = gr.Button("🎵 Generate Speech", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(
	label="Generated Speech",
	type="numpy"
	)

	generate_btn.click(
	fn=synthesize_speech,
	inputs=[text_input, speaker_slider],
	outputs=audio_output
	)

	gr.Examples(
	examples=[
	["Hello everyone! I am here to tell you that Learnable-Speech is amazing!"],
	["The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle."],
	["We propose Learnable-Speech, a new approach to neural text-to-speech synthesis."],
	["This implementation uses flow matching for high-quality 24kHz audio generation."],
	],
	inputs=[text_input],
	)

	return demo

	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	host = os.environ.get("HOST", "0.0.0.0")

	demo = create_demo()

	try:
	demo.launch(
	server_name=host,
	server_port=port,
	share=False,
	show_error=True,
	quiet=False,
	enable_queue=True
	)
	except Exception:
	print(f"Failed to launch on {host}:{port}, trying with share=True")
	demo.launch(
	share=True,
	show_error=True,
	quiet=False,
	enable_queue=True
	)