Voice-Clone-Multilingual

Runtime error

App Files Files Community

Voice-Clone-Multilingual / app.py

drixo

Update app.py

c54fd34 verified about 1 month ago

raw

history blame contribute delete

2.28 kB

	import os
	import shutil
	import torch
	from TTS.api import TTS
	import gradio as gr
	from faster_whisper import WhisperModel

	# Optional: run local setup.py if needed
	import subprocess
	try:
	subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
	print("Installation successful.")
	except subprocess.CalledProcessError as e:
	print(f"Installation failed with error: {e}")

	# Device selection
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Initialize TTS
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

	# Initialize Whisper model for transcription
	whisper_model = WhisperModel("small", device=device)

	# Folder for dataset
	dataset_folder = "my_voice_dataset/audio"
	os.makedirs(dataset_folder, exist_ok=True)

	# Transcription function
	def transcribe_language(audio_path: str) -> str:
	segments, _ = whisper_model.transcribe(audio_path)
	transcription = " ".join([seg.text for seg in segments])
	return transcription

	# Voice cloning function
	def voice_clone(text: str, speaker_wav: str):
	# Save uploaded audio
	filename = os.path.basename(speaker_wav)
	saved_path = os.path.join(dataset_folder, filename)
	shutil.copy(speaker_wav, saved_path)
	print(f"Saved uploaded audio to: {saved_path}")

	# Transcribe audio
	transcription = transcribe_language(saved_path)
	print(f"Transcription: {transcription}")

	# Detect language automatically (fallback to 'en')
	language = "en"
	if transcription.strip():
	try:
	from langdetect import detect
	language = detect(transcription)
	print(f"Detected language: {language}")
	except Exception as e:
	print(f"Language detection failed: {e}")

	# Generate speech
	tts.tts_to_file(text=text, speaker_wav=saved_path, language=language, file_path="output.wav")
	return "output.wav"

	# Gradio interface
	iface = gr.Interface(
	fn=voice_clone,
	theme="Nymbo/Nymbo_Theme",
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
	gr.Audio(type="filepath", label="Upload audio file"),
	],
	outputs=gr.Audio(type="filepath", label="Generated audio file"),
	title="Voice Cloning with Automatic Language Detection"
	)

	iface.launch()