Spaces:
Sleeping
Sleeping
| """ | |
| ChatterBox Desi TTS Space | |
| Fine-tuned multi-language Indic TTS model for text-to-speech synthesis | |
| Supports: Bengali, Hindi, Marathi, Gujarati, Tamil, Telugu | |
| """ | |
| import sys | |
| import os | |
| import gradio as gr | |
| import torch | |
| from huggingface_hub import snapshot_download | |
| import torchaudio | |
| # Add chatterbox-finetuning to path | |
| chatterbox_finetuning_path = os.path.join(os.path.dirname(__file__), "chatterbox-finetuning") | |
| sys.path.insert(0, chatterbox_finetuning_path) | |
| from src.chatterbox_.tts import ChatterboxTTS | |
| # Model configuration | |
| MODEL_ID = "BosonLab/chatterbox-desi" | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Language configuration | |
| LANGUAGES = { | |
| "Bengali (বাংলা)": { | |
| "code": "bn", | |
| "tag": "[bn]", | |
| "example": "আমি বাংলায় কথা বলতে পারি। এটি একটি পরীক্ষামূলক বাক্য।", | |
| }, | |
| "Hindi (हिंदी)": { | |
| "code": "hi", | |
| "tag": "[hi]", | |
| "example": "मैं हिंदी में बोल सकता हूँ। यह एक परीक्षण वाक्य है।", | |
| }, | |
| "Marathi (मराठी)": { | |
| "code": "mr", | |
| "tag": "[mr]", | |
| "example": "मी मराठीत बोलू शकतो. हे एक चाचणी वाक्य आहे.", | |
| }, | |
| "Gujarati (ગુજરાતી)": { | |
| "code": "gu", | |
| "tag": "[gu]", | |
| "example": "હું ગુજરાતીમાં બોલી શકું છું. આ એક પ્રાયોગિક વાક્ય છે.", | |
| }, | |
| "Tamil (தமிழ்)": { | |
| "code": "ta", | |
| "tag": "[ta]", | |
| "example": "நான் தமிழில் பேச முடியும். இது ஒரு சோதனை வாக்கியம்.", | |
| }, | |
| "Telugu (తెలుగు)": { | |
| "code": "te", | |
| "tag": "[te]", | |
| "example": "నేను తెలుగులో మాట్లాడగలను. ఇది ఒక పరీక్ష వాక్యం.", | |
| }, | |
| } | |
| # Load model | |
| print(f"Loading model from {MODEL_ID}...") | |
| print(f"Using device: {DEVICE}") | |
| try: | |
| model_dir = snapshot_download(MODEL_ID) | |
| print(f"Model downloaded to: {model_dir}") | |
| model = ChatterboxTTS.from_local(model_dir, device=DEVICE) | |
| print("Model loaded successfully!") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| raise | |
| def get_example_text(language): | |
| """Return example text for the selected language.""" | |
| lang_info = LANGUAGES.get(language, LANGUAGES["Bengali (বাংলা)"]) | |
| return lang_info["example"] | |
| def generate_speech(text, language, reference_audio=None): | |
| """ | |
| Generate speech from text using the fine-tuned model. | |
| Args: | |
| text: Input text to convert to speech | |
| language: Selected language name | |
| reference_audio: Optional reference audio path for voice cloning | |
| Returns: | |
| Tuple of (audio_data, metadata_str) | |
| """ | |
| try: | |
| lang_info = LANGUAGES.get(language, LANGUAGES["Bengali (বাংলা)"]) | |
| lang_tag = lang_info["tag"] | |
| # Prepend language tag if not already present | |
| if not text.strip().startswith("["): | |
| tagged_text = f"{lang_tag} {text.strip()}" | |
| else: | |
| tagged_text = text.strip() | |
| # Generate speech with optional voice cloning | |
| if reference_audio is not None: | |
| wav = model.generate(tagged_text, audio_prompt_path=reference_audio) | |
| else: | |
| wav = model.generate(tagged_text) | |
| metadata = ( | |
| f"Language: {language}\n" | |
| f"Tag: {lang_tag}\n" | |
| f"Input: {text[:100]}{'...' if len(text) > 100 else ''}\n" | |
| f"Characters: {len(text)}" | |
| ) | |
| return (model.sr, wav.squeeze(0).numpy()), metadata | |
| except Exception as e: | |
| print(f"Error generating speech: {e}") | |
| return None, f"Error: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="ChatterBox Desi TTS", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # ChatterBox Desi TTS | |
| Multi-language Indic text-to-speech powered by [BosonLab/chatterbox-desi](https://huggingface.co/BosonLab/chatterbox-desi). | |
| Supports **Bengali, Hindi, Marathi, Gujarati, Tamil, and Telugu**. | |
| > **Note**: The model automatically adds the language tag. Just select your language and type your text. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| language_dropdown = gr.Dropdown( | |
| label="Language", | |
| choices=list(LANGUAGES.keys()), | |
| value="Bengali (বাংলা)", | |
| ) | |
| text_input = gr.Textbox( | |
| label="Text", | |
| placeholder="Enter text in your selected language...", | |
| lines=5, | |
| max_lines=10, | |
| ) | |
| example_btn = gr.Button("Load Example Text", variant="secondary") | |
| reference_audio = gr.Audio( | |
| label="Reference Audio (Optional — for voice cloning)", | |
| type="filepath", | |
| ) | |
| generate_btn = gr.Button("Generate Speech", variant="primary") | |
| with gr.Column(scale=1): | |
| audio_output = gr.Audio(label="Generated Speech") | |
| metadata_output = gr.Textbox(label="Info", lines=4) | |
| # Language examples | |
| gr.Markdown("### Example Texts by Language") | |
| with gr.Row(): | |
| for lang_name, lang_info in LANGUAGES.items(): | |
| gr.Markdown( | |
| f"**{lang_name}**\n\n{lang_info['example']}" | |
| ) | |
| # Event handlers | |
| example_btn.click( | |
| fn=get_example_text, | |
| inputs=[language_dropdown], | |
| outputs=[text_input], | |
| ) | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, language_dropdown, reference_audio], | |
| outputs=[audio_output, metadata_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| ) | |