Spaces:

abhishekrajpurohit
/

generate_local_lan

Runtime error

App Files Files Community

generate_local_lan / app.py

abhishekrajpurohit

Upload 39 files

195bb33 verified 12 months ago

raw

history blame contribute delete

6.45 kB

	import gradio as gr
	from models.tts import TTSModel
	from utils.audio_utils import save_audio, get_cached_audio, get_audio_filename
	from utils.input_validation import validate_input
	from config.language_mapping import (
	LANGUAGE_VOICE_MAPPING,
	construct_description,
	EMOTION_DESC,
	SPEED_DESC,
	PITCH_DESC,
	BACKGROUND_NOISE_DESC,
	REVERBERATION_DESC,
	QUALITY_DESC,
	get_speakers_for_language
	)

	def generate_speech(
	text,
	language,
	speaker,
	emotion="Neutral",
	speed="Normal",
	pitch="Medium",
	background_noise="Minimal",
	reverberation="Close",
	quality="High"
	):
	try:
	# Validate inputs
	validate_input(text, language)

	# Check if audio is already cached
	cached_audio = get_cached_audio(
	text, language, speaker, emotion, speed,
	pitch, background_noise, reverberation, quality
	)

	if cached_audio:
	return cached_audio

	# Get the description using the imported constructor
	description = construct_description(
	speaker,
	language,
	emotion,
	speed,
	pitch,
	background_noise,
	reverberation,
	quality
	)

	# Generate audio
	tts_model = TTSModel()
	audio_array = tts_model.generate_audio(text, description)

	# Save the generated audio
	filename = get_audio_filename(
	text, language, speaker, emotion, speed,
	pitch, background_noise, reverberation, quality
	)
	filepath = save_audio(audio_array, filename)

	return filepath

	except Exception as e:
	raise gr.Error(str(e))

	# Create Gradio interface
	with gr.Blocks(title="Indic Text-to-Speech") as demo:
	gr.Markdown("# Indian Local Text-to-Speech Synthesizer")
	gr.Markdown("Generate natural speech in multiple Indian languages using AI4Bharat's model")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Text to speak",
	placeholder="Enter the text you want to convert to speech...",
	lines=3
	)

	with gr.Row():
	language_input = gr.Dropdown(
	choices=sorted(list(LANGUAGE_VOICE_MAPPING.keys())),
	label="Language",
	value="English"
	)
	speaker_input = gr.Dropdown(
	choices=LANGUAGE_VOICE_MAPPING["English"], # Default choices
	label="Speaker",
	value=LANGUAGE_VOICE_MAPPING["English"][0] # Default value
	)

	with gr.Row():
	emotion_input = gr.Dropdown(
	choices=list(EMOTION_DESC.keys()),
	label="Expressivity",
	value="Neutral"
	)
	speed_input = gr.Dropdown(
	choices=list(SPEED_DESC.keys()),
	label="Speaking Speed",
	value="Normal"
	)

	with gr.Row():
	pitch_input = gr.Dropdown(
	choices=list(PITCH_DESC.keys()),
	label="Pitch",
	value="Medium"
	)
	background_input = gr.Dropdown(
	choices=list(BACKGROUND_NOISE_DESC.keys()),
	label="Background Noise",
	value="Minimal"
	)

	with gr.Row():
	reverb_input = gr.Dropdown(
	choices=list(REVERBERATION_DESC.keys()),
	label="Reverberation",
	value="Close"
	)
	quality_input = gr.Dropdown(
	choices=list(QUALITY_DESC.keys()),
	label="Audio Quality",
	value="High"
	)

	generate_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(
	label="Generated Speech",
	type="numpy"
	)

	# Update speaker choices when language changes
	def update_speakers(language):
	speakers = get_speakers_for_language(language)
	return gr.Dropdown(choices=speakers, value=speakers[0])

	language_input.change(
	fn=update_speakers,
	inputs=[language_input],
	outputs=[speaker_input]
	)

	# Connect the components
	generate_btn.click(
	fn=generate_speech,
	inputs=[
	text_input,
	language_input,
	speaker_input,
	emotion_input,
	speed_input,
	pitch_input,
	background_input,
	reverb_input,
	quality_input
	],
	outputs=audio_output
	)

	# Pre-generate and cache example outputs
	example_outputs = []
	examples = [
	["Hello, how are you?", "English", "Thoma", "Happy", "Normal", "Medium", "Minimal", "Close", "High"],
	["नमस्ते, आप कैसे हैं?", "Hindi", "Rohit", "Neutral", "Normal", "Medium", "None", "Very Close", "Studio"],
	["ನಮಸ್ಕಾರ, ಹೇಗಿದ್ದೀರಾ?", "Kannada", "Suresh", "Highly Expressive", "Fast", "High", "Minimal", "Moderate", "High"],
	["How are you doing today?", "English", "Mary", "Monotone", "Slow", "Low", "Moderate", "Distant", "Good"],
	]

	# Generate and cache example outputs at startup
	for example in examples:
	output = generate_speech(*example)
	example_outputs.append(output)

	# Add examples with cached outputs
	gr.Examples(
	examples=examples,
	inputs=[
	text_input,
	language_input,
	speaker_input,
	emotion_input,
	speed_input,
	pitch_input,
	background_input,
	reverb_input,
	quality_input
	],
	outputs=audio_output,
	fn=generate_speech,
	cache_examples=True,
	preprocess=False, # Don't preprocess inputs
	postprocess=False # Don't postprocess outputs
	)

	if __name__ == "__main__":
	demo.launch()