Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import zipfile | |
| from docx import Document | |
| from TTS.api import TTS | |
| from pydub import AudioSegment | |
| import gradio as gr | |
| # Available TTS models with voice descriptions | |
| VOICE_MODELS = { | |
| "Jenny (Expressive Female)": "tts_models/en/jenny/jenny", | |
| "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits", | |
| "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits" | |
| } | |
| # Function to update speaker choices based on the selected model | |
| def update_speaker_choices(selected_voice): | |
| if selected_voice == "VCTK (Multiple Speakers)": | |
| return ["Speaker 1", "Speaker 2", "Speaker 3"] # Modify with actual speaker names or indices | |
| return ["Default Speaker"] | |
| def docx_to_wav_zip(doc_file, selected_voice, speaker_name): | |
| # Load the selected TTS model | |
| tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False) | |
| # Extract text from .docx | |
| document = Document(doc_file.name) | |
| full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()]) | |
| # Generate temporary paths | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav: | |
| wav_path = tmp_wav.name | |
| zip_path = wav_path.replace(".wav", ".zip") | |
| # Get speaker index (this part assumes speaker names are like 'Speaker 1', 'Speaker 2', etc.) | |
| speaker_idx = int(speaker_name.split()[-1]) - 1 if speaker_name.startswith("Speaker") else 0 | |
| # Generate speech with the selected speaker index | |
| tts.tts_to_file(text=full_text, speaker_idx=speaker_idx, file_path=wav_path) | |
| # Convert wav to mp3 and zip the result | |
| sound = AudioSegment.from_wav(wav_path) | |
| sound.export(wav_path, format="wav") # keeping the wav format | |
| # Zip the files | |
| with zipfile.ZipFile(zip_path, 'w') as zipf: | |
| zipf.write(wav_path, os.path.basename(wav_path)) | |
| return zip_path | |
| # Gradio interface | |
| interface = gr.Interface( | |
| fn=docx_to_wav_zip, | |
| inputs=[ | |
| gr.File(label="Upload .docx File"), | |
| gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)"), | |
| gr.Dropdown(choices=update_speaker_choices("VCTK (Multiple Speakers)"), label="Choose Speaker", value="Speaker 1") # Example | |
| ], | |
| outputs=gr.File(label="Download Zip File"), | |
| title="Realistic Voiceover from DOCX (Multiple Voices)", | |
| description="Upload a .docx file, choose a realistic voice, and pick a speaker to generate a voiceover in WAV format." | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |