Spaces:
Runtime error
Runtime error
| import os | |
| import shutil | |
| import torch | |
| from TTS.api import TTS | |
| import gradio as gr | |
| from faster_whisper import WhisperModel | |
| # Optional: run local setup.py if needed | |
| import subprocess | |
| try: | |
| subprocess.run(['python', 'setup.py', 'install', '--user'], check=True) | |
| print("Installation successful.") | |
| except subprocess.CalledProcessError as e: | |
| print(f"Installation failed with error: {e}") | |
| # Device selection | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Initialize TTS | |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) | |
| # Initialize Whisper model for transcription | |
| whisper_model = WhisperModel("small", device=device) | |
| # Folder for dataset | |
| dataset_folder = "my_voice_dataset/audio" | |
| os.makedirs(dataset_folder, exist_ok=True) | |
| # Transcription function | |
| def transcribe_language(audio_path: str) -> str: | |
| segments, _ = whisper_model.transcribe(audio_path) | |
| transcription = " ".join([seg.text for seg in segments]) | |
| return transcription | |
| # Voice cloning function | |
| def voice_clone(text: str, speaker_wav: str): | |
| # Save uploaded audio | |
| filename = os.path.basename(speaker_wav) | |
| saved_path = os.path.join(dataset_folder, filename) | |
| shutil.copy(speaker_wav, saved_path) | |
| print(f"Saved uploaded audio to: {saved_path}") | |
| # Transcribe audio | |
| transcription = transcribe_language(saved_path) | |
| print(f"Transcription: {transcription}") | |
| # Detect language automatically (fallback to 'en') | |
| language = "en" | |
| if transcription.strip(): | |
| try: | |
| from langdetect import detect | |
| language = detect(transcription) | |
| print(f"Detected language: {language}") | |
| except Exception as e: | |
| print(f"Language detection failed: {e}") | |
| # Generate speech | |
| tts.tts_to_file(text=text, speaker_wav=saved_path, language=language, file_path="output.wav") | |
| return "output.wav" | |
| # Gradio interface | |
| iface = gr.Interface( | |
| fn=voice_clone, | |
| theme="Nymbo/Nymbo_Theme", | |
| inputs=[ | |
| gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"), | |
| gr.Audio(type="filepath", label="Upload audio file"), | |
| ], | |
| outputs=gr.Audio(type="filepath", label="Generated audio file"), | |
| title="Voice Cloning with Automatic Language Detection" | |
| ) | |
| iface.launch() |