import os import time import torch import torchaudio import gradio as gr from TTS.api import TTS from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.xtts import XttsAudioConfig, Xtts, XttsArgs from TTS.config.shared_configs import BaseDatasetConfig, BaseAudioConfig from pathlib import Path from datetime import datetime from pydub import AudioSegment device = "cuda" if torch.cuda.is_available() else "cpu" OUTPUT_DIR = Path("outputs") OUTPUT_DIR.mkdir(exist_ok=True) HISTORY = [] tts = None # PyTorch 2.6 fix - allowlist all XTTS classes torch.serialization.add_safe_globals([ XttsConfig, XttsAudioConfig, BaseDatasetConfig, BaseAudioConfig, Xtts, XttsArgs, ]) def get_tts(): global tts if tts is None: os.environ["COQUI_TOS_AGREED"] = "1" tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) return tts LANGUAGES = [ ("English", "en"), ("Hindi", "hi"), ("French", "fr"), ("German", "de"), ("Spanish", "es"), ("Italian", "it"), ("Portuguese", "pt"), ("Chinese", "zh"), ("Japanese", "ja"), ("Korean", "ko"), ("Arabic", "ar"), ("Turkish", "tr"), ("Russian", "ru"), ("Dutch", "nl"), ("Polish", "pl"), ("Czech", "cs"), ("Hungarian", "hu"), ] ALLOWED_FORMATS = [ ".wav", ".mp3", ".flac", ".ogg", ".m4a", ".aac", ".wma", ".opus", ".mpeg", ".mp4" ] def convert_to_wav(audio_path): ext = Path(audio_path).suffix.lower() if ext == ".wav": return audio_path try: audio = AudioSegment.from_file(audio_path) wav_path = str(Path(audio_path).with_suffix(".wav")) audio.export(wav_path, format="wav") return wav_path except Exception as e: raise Exception(f"Could not convert audio to WAV: {str(e)}") def validate_audio(audio_path): if audio_path is None: return False, "Please upload a voice sample." ext = Path(audio_path).suffix.lower() if ext not in ALLOWED_FORMATS: return False, f"Unsupported format '{ext}'. Allowed: WAV, MP3, FLAC, OGG, M4A, AAC, WMA, OPUS, MPEG, MP4" # Try torchaudio first try: waveform, sample_rate = torchaudio.load(audio_path) duration = waveform.shape[1] / sample_rate if duration < 3: return False, f"Audio too short ({duration:.1f}s). Minimum 3 seconds." if duration > 30: return False, f"Audio too long ({duration:.1f}s). Maximum 30 seconds." return True, f"Audio valid | Format: {ext.upper()} | Duration: {duration:.1f}s" except Exception: pass # Fallback to pydub try: audio = AudioSegment.from_file(audio_path) duration = len(audio) / 1000 if duration < 3: return False, f"Audio too short ({duration:.1f}s). Minimum 3 seconds." if duration > 30: return False, f"Audio too long ({duration:.1f}s). Maximum 30 seconds." return True, f"Audio valid | Format: {ext.upper()} | Duration: {duration:.1f}s" except Exception as e: return False, f"Could not read audio file: {str(e)}" def validate_text(text): if not text or not text.strip(): return False, "Please enter some text." if len(text.strip()) < 5: return False, "Text too short." if len(text.strip()) > 1000: return False, "Text too long." return True, "OK" def get_history_html(): if not HISTORY: return "
No clones yet.
" rows = "" for i, h in enumerate(reversed(HISTORY[-10:]), 1): rows += ( f"| # | " f"Time | " f"Text | " f"Language | " f"Duration | " f"Generated in | " f"
|---|
No clones yet.
") clear_history_btn = gr.Button("Clear History", variant="stop") with gr.Tab("System Info"): system_info = gr.Textbox( value=get_system_info(), interactive=False, lines=6, label="System" ) refresh_btn = gr.Button("Refresh", variant="secondary") text_input.change(fn=count_chars, inputs=text_input, outputs=char_display) audio_input.change(fn=audio_info, inputs=audio_input, outputs=audio_status) clone_btn.click( fn=clone_voice, inputs=[text_input, audio_input, language, speed], outputs=[audio_output, status_msg, history_display] ) clear_btn.click( fn=clear_all, inputs=[], outputs=[text_input, audio_input, language, speed, audio_output, status_msg, history_display] ) clear_history_btn.click(fn=clear_history, inputs=[], outputs=[history_display]) refresh_btn.click(fn=get_system_info, inputs=[], outputs=[system_info]) demo.launch( server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple") )