Spaces:

BosonLab
/

chatterbox-desi

Sleeping

App Files Files Community

chatterbox-desi / app.py

arijitx

Upload folder using huggingface_hub

68268d5 verified 3 days ago

raw

history blame contribute delete

6.14 kB

	"""
	ChatterBox Desi TTS Space
	Fine-tuned multi-language Indic TTS model for text-to-speech synthesis
	Supports: Bengali, Hindi, Marathi, Gujarati, Tamil, Telugu
	"""

	import sys
	import os
	import gradio as gr
	import torch
	from huggingface_hub import snapshot_download
	import torchaudio

	# Add chatterbox-finetuning to path
	chatterbox_finetuning_path = os.path.join(os.path.dirname(__file__), "chatterbox-finetuning")
	sys.path.insert(0, chatterbox_finetuning_path)

	from src.chatterbox_.tts import ChatterboxTTS

	# Model configuration
	MODEL_ID = "BosonLab/chatterbox-desi"
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# Language configuration
	LANGUAGES = {
	"Bengali (বাংলা)": {
	"code": "bn",
	"tag": "[bn]",
	"example": "আমি বাংলায় কথা বলতে পারি। এটি একটি পরীক্ষামূলক বাক্য।",
	},
	"Hindi (हिंदी)": {
	"code": "hi",
	"tag": "[hi]",
	"example": "मैं हिंदी में बोल सकता हूँ। यह एक परीक्षण वाक्य है।",
	},
	"Marathi (मराठी)": {
	"code": "mr",
	"tag": "[mr]",
	"example": "मी मराठीत बोलू शकतो. हे एक चाचणी वाक्य आहे.",
	},
	"Gujarati (ગુજરાતી)": {
	"code": "gu",
	"tag": "[gu]",
	"example": "હું ગુજરાતીમાં બોલી શકું છું. આ એક પ્રાયોગિક વાક્ય છે.",
	},
	"Tamil (தமிழ்)": {
	"code": "ta",
	"tag": "[ta]",
	"example": "நான் தமிழில் பேச முடியும். இது ஒரு சோதனை வாக்கியம்.",
	},
	"Telugu (తెలుగు)": {
	"code": "te",
	"tag": "[te]",
	"example": "నేను తెలుగులో మాట్లాడగలను. ఇది ఒక పరీక్ష వాక్యం.",
	},
	}

	# Load model
	print(f"Loading model from {MODEL_ID}...")
	print(f"Using device: {DEVICE}")

	try:
	model_dir = snapshot_download(MODEL_ID)
	print(f"Model downloaded to: {model_dir}")
	model = ChatterboxTTS.from_local(model_dir, device=DEVICE)
	print("Model loaded successfully!")
	except Exception as e:
	print(f"Error loading model: {e}")
	raise


	def get_example_text(language):
	"""Return example text for the selected language."""
	lang_info = LANGUAGES.get(language, LANGUAGES["Bengali (বাংলা)"])
	return lang_info["example"]


	def generate_speech(text, language, reference_audio=None):
	"""
	Generate speech from text using the fine-tuned model.

	Args:
	text: Input text to convert to speech
	language: Selected language name
	reference_audio: Optional reference audio path for voice cloning

	Returns:
	Tuple of (audio_data, metadata_str)
	"""
	try:
	lang_info = LANGUAGES.get(language, LANGUAGES["Bengali (বাংলা)"])
	lang_tag = lang_info["tag"]

	# Prepend language tag if not already present
	if not text.strip().startswith("["):
	tagged_text = f"{lang_tag} {text.strip()}"
	else:
	tagged_text = text.strip()

	# Generate speech with optional voice cloning
	if reference_audio is not None:
	wav = model.generate(tagged_text, audio_prompt_path=reference_audio)
	else:
	wav = model.generate(tagged_text)

	metadata = (
	f"Language: {language}\n"
	f"Tag: {lang_tag}\n"
	f"Input: {text[:100]}{'...' if len(text) > 100 else ''}\n"
	f"Characters: {len(text)}"
	)
	return (model.sr, wav.squeeze(0).numpy()), metadata

	except Exception as e:
	print(f"Error generating speech: {e}")
	return None, f"Error: {str(e)}"


	# Create Gradio interface
	with gr.Blocks(title="ChatterBox Desi TTS", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# ChatterBox Desi TTS
	Multi-language Indic text-to-speech powered by [BosonLab/chatterbox-desi](https://huggingface.co/BosonLab/chatterbox-desi).
	Supports Bengali, Hindi, Marathi, Gujarati, Tamil, and Telugu.

	> Note: The model automatically adds the language tag. Just select your language and type your text.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	language_dropdown = gr.Dropdown(
	label="Language",
	choices=list(LANGUAGES.keys()),
	value="Bengali (বাংলা)",
	)

	text_input = gr.Textbox(
	label="Text",
	placeholder="Enter text in your selected language...",
	lines=5,
	max_lines=10,
	)

	example_btn = gr.Button("Load Example Text", variant="secondary")

	reference_audio = gr.Audio(
	label="Reference Audio (Optional — for voice cloning)",
	type="filepath",
	)

	generate_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column(scale=1):
	audio_output = gr.Audio(label="Generated Speech")
	metadata_output = gr.Textbox(label="Info", lines=4)

	# Language examples
	gr.Markdown("### Example Texts by Language")
	with gr.Row():
	for lang_name, lang_info in LANGUAGES.items():
	gr.Markdown(
	f"{lang_name}\n\n{lang_info['example']}"
	)

	# Event handlers
	example_btn.click(
	fn=get_example_text,
	inputs=[language_dropdown],
	outputs=[text_input],
	)

	generate_btn.click(
	fn=generate_speech,
	inputs=[text_input, language_dropdown, reference_audio],
	outputs=[audio_output, metadata_output],
	)


	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	)