Spaces:

jafrilalam
/

Bangla_TTS

Sleeping

App Files Files Community

Bangla_TTS / app.py

jafrilalam

Update app.py

f34f902 verified 10 months ago

raw

history blame contribute delete

9.21 kB

	import gradio as gr
	import edge_tts
	import asyncio
	import tempfile
	import os
	import time
	import fitz
	import ebooklib
	from ebooklib import epub
	from bs4 import BeautifulSoup

	async def get_voices():
	voices = [
	{"ShortName": "bn-IN-TanishaaNeural", "Locale": "bn-IN", "Gender": "Female"},
	{"ShortName": "bn-IN-BashkarNeural", "Locale": "bn-IN", "Gender": "Male"},
	{"ShortName": "bn-BD-PradeepNeural", "Locale": "bn-BD", "Gender": "Male"},
	{"ShortName": "bn-BD-NabanitaNeural", "Locale": "bn-BD", "Gender": "Female"},
	]
	return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

	def extract_text_from_file(file):
	if file is None:
	return None, "No file uploaded!"

	file_path = file.name
	file_ext = os.path.splitext(file_path)[1].lower()

	try:
	if file_ext == ".pdf":
	doc = fitz.open(file_path)
	text = ""
	for page in doc:
	text += page.get_text("text")
	doc.close()
	return text.strip(), None
	elif file_ext == ".epub":
	book = epub.read_epub(file_path)
	text = ""
	for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
	content = item.get_content().decode("utf-8")
	soup = BeautifulSoup(content, "lxml")
	for element in soup.find_all(text=True):
	if element.strip():
	text += element.strip() + "\n"
	return text.strip(), None
	elif file_ext == ".txt":
	with open(file_path, "r", encoding="utf-8") as f:
	text = f.read()
	return text.strip(), None
	else:
	return None, "Unsupported file format"
	except Exception as e:
	return None, f"Error processing file: {str(e)}"

	async def text_to_speech(text, voice, rate, pitch):
	if not text.strip():
	return None, "Enter text to convert"
	if not voice:
	return None, "Select a voice and accent"

	voice_short_name = voice.split(" - ")[0]
	rate_str = f"{rate:+d}%"
	pitch_str = f"{pitch:+d}Hz"
	communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)

	timestamp = time.strftime("%Y%m%d_%H%M%S")
	with tempfile.NamedTemporaryFile(delete=False, suffix=f"_tts_{timestamp}.mp3") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)

	return tmp_path, None

	async def tts_interface(text, voice, rate, pitch):
	audio, warning = await text_to_speech(text, voice, rate, pitch)
	if warning:
	return audio, None, gr.Warning(warning)
	return audio, audio, None

	async def handle_file_upload(file):
	text, warning = extract_text_from_file(file)
	if warning:
	return None, gr.Warning(warning)
	return text, None

	def reset_fields():
	return None, "", "", 0, 0, None, None, ""

	async def create_demo():
	voices = await get_voices()

	css = """
	body {
	font-family: 'Poppins', sans-serif;
	background: linear-gradient(135deg, #ff7e5f, #feb47b); /* Coral to peach gradient */
	color: #ffffff;
	}
	.gr-button-primary {
	background: linear-gradient(45deg, #ff6f61, #ff9f43) !important;
	border: none !important;
	color: white !important;
	padding: 14px 28px !important;
	border-radius: 12px !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3) !important;
	}
	.gr-button-primary:hover {
	background: linear-gradient(45deg, #ff5733, #ff8c00) !important;
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 24px rgba(0, 0, 0, 0.4) !important;
	}
	.gr-button-secondary {
	background: linear-gradient(45deg, #6b7280, #9ca3af) !important;
	border: none !important;
	color: white !important;
	padding: 14px 28px !important;
	border-radius: 12px !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3) !important;
	}
	.gr-button-secondary:hover {
	background: linear-gradient(45deg, #4b5563, #7c8a9e) !important;
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 24px rgba(0, 0, 0, 0.4) !important;
	}
	.gr-textbox, .gr-dropdown, .gr-slider, .gr-file {
	border-radius: 12px !important;
	border: 1px solid #4b5e8e !important;
	background: rgba(255, 255, 255, 0.1) !important;
	color: #ffffff !important;
	box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2) !important;
	transition: all 0.3s ease !important;
	}
	.gr-textbox::placeholder {
	color: #a0a9c7 !important;
	}
	.gr-group {
	background: rgba(255, 255, 255, 0.15) !important;
	border-radius: 16px !important;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3) !important;
	padding: 24px !important;
	margin-bottom: 24px !important;
	backdrop-filter: blur(10px) !important;
	}
	.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
	color: #ffd700 !important;
	text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important;
	}
	.gr-audio, .gr-file {
	border-radius: 12px !important;
	background: rgba(255, 255, 255, 0.1) !important;
	padding: 12px !important;
	box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2) !important;
	}
	.container {
	max-width: 900px !important;
	margin: auto !important;
	padding: 20px !important;
	}
	.gr-slider .slider {
	background: #4b5e8e !important;
	}
	.gr-slider .slider::-webkit-slider-thumb {
	background: #ff6f61 !important;
	border-radius: 50% !important;
	}
	"""

	with gr.Blocks(css=css, analytics_enabled=False) as demo:
	gr.Markdown(
	"""
	# 🎙️ Text-to-Speech Converter
	A modern tool to convert text or files (PDF, EPUB, TXT) into high-quality audio.
	""",
	elem_classes=["container"]
	)

	with gr.Group():
	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### Input Section")
	file_input = gr.File(
	label="Upload File (PDF, EPUB, TXT)",
	file_types=[".pdf", ".epub", ".txt"],
	file_count="single"
	)
	text_input = gr.Textbox(
	label="Text Input",
	lines=5,
	placeholder="Enter text or upload a file",
	show_copy_button=True
	)
	with gr.Column(scale=1):
	gr.Markdown("### Settings")
	voice_dropdown = gr.Dropdown(
	choices=[""] + list(voices.keys()),
	label="Voice & Accent",
	value="",
	info="Select voice and accent"
	)
	rate_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	label="Speech Rate (%)",
	step=1,
	info="Adjust speed: 0% default, +50% faster, -50% slower."
	)
	pitch_slider = gr.Slider(
	minimum=-20,
	maximum=20,
	value=0,
	label="Pitch (Hz)",
	step=1,
	info="Adjust pitch: 0 Hz default, +20 Hz higher, -20 Hz lower."
	)

	with gr.Row():
	generate_btn = gr.Button("Generate Audio", variant="primary")
	reset_btn = gr.Button("Reset", variant="secondary")

	gr.Markdown("### Output Section")
	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	download_output = gr.File(label="Download Audio File")
	warning_md = gr.Markdown(label="Warning", visible=False)

	# Event handlers
	file_input.change(
	fn=handle_file_upload,
	inputs=[file_input],
	outputs=[text_input, warning_md],
	show_progress=True
	)
	generate_btn.click(
	fn=tts_interface,
	inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
	outputs=[audio_output, download_output, warning_md],
	show_progress=True
	)
	reset_btn.click(
	fn=reset_fields,
	inputs=[],
	outputs=[file_input, text_input, voice_dropdown, rate_slider, pitch_slider, audio_output, download_output, warning_md]
	)

	return demo

	async def main():
	demo = await create_demo()
	demo.queue(default_concurrency_limit=50)
	demo.launch(show_api=False)

	if __name__ == "__main__":
	asyncio.run(main())