drixo's picture
Update app.py
c54fd34 verified
import os
import shutil
import torch
from TTS.api import TTS
import gradio as gr
from faster_whisper import WhisperModel
# Optional: run local setup.py if needed
import subprocess
try:
subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
print("Installation successful.")
except subprocess.CalledProcessError as e:
print(f"Installation failed with error: {e}")
# Device selection
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
# Initialize Whisper model for transcription
whisper_model = WhisperModel("small", device=device)
# Folder for dataset
dataset_folder = "my_voice_dataset/audio"
os.makedirs(dataset_folder, exist_ok=True)
# Transcription function
def transcribe_language(audio_path: str) -> str:
segments, _ = whisper_model.transcribe(audio_path)
transcription = " ".join([seg.text for seg in segments])
return transcription
# Voice cloning function
def voice_clone(text: str, speaker_wav: str):
# Save uploaded audio
filename = os.path.basename(speaker_wav)
saved_path = os.path.join(dataset_folder, filename)
shutil.copy(speaker_wav, saved_path)
print(f"Saved uploaded audio to: {saved_path}")
# Transcribe audio
transcription = transcribe_language(saved_path)
print(f"Transcription: {transcription}")
# Detect language automatically (fallback to 'en')
language = "en"
if transcription.strip():
try:
from langdetect import detect
language = detect(transcription)
print(f"Detected language: {language}")
except Exception as e:
print(f"Language detection failed: {e}")
# Generate speech
tts.tts_to_file(text=text, speaker_wav=saved_path, language=language, file_path="output.wav")
return "output.wav"
# Gradio interface
iface = gr.Interface(
fn=voice_clone,
theme="Nymbo/Nymbo_Theme",
inputs=[
gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
gr.Audio(type="filepath", label="Upload audio file"),
],
outputs=gr.Audio(type="filepath", label="Generated audio file"),
title="Voice Cloning with Automatic Language Detection"
)
iface.launch()