Spaces:

SohomToom
/

DocToAudioConverted

Sleeping

App Files Files Community

DocToAudioConverted / app.py

SohomToom

Update app.py

68f40ec verified 10 months ago

raw

history blame

2.53 kB

	import os
	import tempfile
	import zipfile
	from docx import Document
	from TTS.api import TTS
	from pydub import AudioSegment
	import gradio as gr

	# Available TTS models with voice descriptions
	VOICE_MODELS = {
	"Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
	"LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
	"VCTK (Multiple Speakers)": "tts_models/en/vctk/vits"
	}

	# Function to update speaker choices based on the selected model
	def update_speaker_choices(selected_voice):
	if selected_voice == "VCTK (Multiple Speakers)":
	return ["Speaker 1", "Speaker 2", "Speaker 3"] # Modify with actual speaker names or indices
	return ["Default Speaker"]

	def docx_to_wav_zip(doc_file, selected_voice, speaker_name):
	# Load the selected TTS model
	tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)

	# Extract text from .docx
	document = Document(doc_file.name)
	full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])

	# Generate temporary paths
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
	wav_path = tmp_wav.name
	zip_path = wav_path.replace(".wav", ".zip")

	# Get speaker index (this part assumes speaker names are like 'Speaker 1', 'Speaker 2', etc.)
	speaker_idx = int(speaker_name.split()[-1]) - 1 if speaker_name.startswith("Speaker") else 0

	# Generate speech with the selected speaker index
	tts.tts_to_file(text=full_text, speaker_idx=speaker_idx, file_path=wav_path)

	# Convert wav to mp3 and zip the result
	sound = AudioSegment.from_wav(wav_path)
	sound.export(wav_path, format="wav") # keeping the wav format

	# Zip the files
	with zipfile.ZipFile(zip_path, 'w') as zipf:
	zipf.write(wav_path, os.path.basename(wav_path))

	return zip_path

	# Gradio interface
	interface = gr.Interface(
	fn=docx_to_wav_zip,
	inputs=[
	gr.File(label="Upload .docx File"),
	gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)"),
	gr.Dropdown(choices=update_speaker_choices("VCTK (Multiple Speakers)"), label="Choose Speaker", value="Speaker 1") # Example
	],
	outputs=gr.File(label="Download Zip File"),
	title="Realistic Voiceover from DOCX (Multiple Voices)",
	description="Upload a .docx file, choose a realistic voice, and pick a speaker to generate a voiceover in WAV format."
	)

	if __name__ == "__main__":
	interface.launch()