Spaces:

jafrilalam
/

Edge_TTS

Running

File size: 9,298 Bytes

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import time
import fitz 
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup

async def get_voices():
    # Mock voice list to include specified voices
    voices = [
        {"ShortName": "bn-IN-TanishaaNeural", "Locale": "bn-IN", "Gender": "Female"},
        {"ShortName": "bn-IN-BashkarNeural", "Locale": "bn-IN", "Gender": "Male"},
        {"ShortName": "bn-BD-PradeepNeural", "Locale": "bn-BD", "Gender": "Male"},
        {"ShortName": "bn-BD-NabanitaNeural", "Locale": "bn-BD", "Gender": "Female"},
    ]
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

def extract_text_from_file(file):
    if file is None:
        return None, "কোনো ফাইল আপলোড করা হয়নি"
    
    file_path = file.name
    file_ext = os.path.splitext(file_path)[1].lower()
    
    try:
        if file_ext == ".pdf":
            doc = fitz.open(file_path)
            text = ""
            for page in doc:
                text += page.get_text("text")
            doc.close()
            return text.strip(), None
        elif file_ext == ".epub":
            book = epub.read_epub(file_path)
            text = ""
            for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
                content = item.get_content().decode("utf-8")
                soup = BeautifulSoup(content, "lxml")
                for element in soup.find_all(text=True):
                    if element.strip():
                        text += element.strip() + "\n"
            return text.strip(), None
        elif file_ext == ".txt":
            with open(file_path, "r", encoding="utf-8") as f:
                text = f.read()
            return text.strip(), None
        else:
            return None, "অসমর্থিত ফাইল ফরম্যাট। শুধুমাত্র PDF, EPUB, এবং TXT ফাইল সমর্থিত।"
    except Exception as e:
        return None, f"টেক্সট নিষ্কাশনে ত্রুটি: {str(e)}"

async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, "বাংলা লেখা সংযুক্ত করুন"
    if not voice:
        return None, "বাচনভঙ্গি এবং কণ্ঠস্বর নির্বাচন করুন"
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    
    # Save to mp3 file with custom name
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    with tempfile.NamedTemporaryFile(delete=False, suffix=f"_tts_{timestamp}.mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    
    return tmp_path, None

async def tts_interface(text, voice, rate, pitch):
    audio, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return audio, None, gr.Warning(warning)
    return audio, audio, None

async def handle_file_upload(file):
    text, warning = extract_text_from_file(file)
    if warning:
        return None, gr.Warning(warning)
    return text, None

def reset_fields():
    return None, "", "", 0, 0, None, None, ""

async def create_demo():
    voices = await get_voices()
    
    # Custom CSS for aesthetic and robust UI
    css = """
    body {
        font-family: 'Noto Sans Bengali', sans-serif;
        background: linear-gradient(135deg, #e0f7fa, #b2ebf2);
    }
    .gr-button-primary {
        background: linear-gradient(45deg, #0288d1, #4fc3f7) !important;
        border: none !important;
        color: white !important;
        padding: 12px 24px !important;
        border-radius: 8px !important;
        font-weight: bold !important;
        transition: all 0.3s ease !important;
    }
    .gr-button-primary:hover {
        background: linear-gradient(45deg, #0277bd, #29b6f6) !important;
        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2) !important;
    }
    .gr-button-secondary {
        background: linear-gradient(45deg, #e57373, #f06292) !important;
        border: none !important;
        color: white !important;
        padding: 12px 24px !important;
        border-radius: 8px !important;
        font-weight: bold !important;
        transition: all 0.3s ease !important;
    }
    .gr-button-secondary:hover {
        background: linear-gradient(45deg, #d32f2f, #e91e63) !important;
        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2) !important;
    }
    .gr-textbox, .gr-dropdown, .gr-slider, .gr-file {
        border-radius: 8px !important;
        border: 1px solid #b0bec5 !important;
        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
    }
    .gr-group {
        background: white !important;
        border-radius: 12px !important;
        box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1) !important;
        padding: 20px !important;
        margin-bottom: 20px !important;
    }
    .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
        color: #01579b !important;
    }
    .gr-audio, .gr-file {
        border-radius: 8px !important;
        background: #f5f5f5 !important;
        padding: 10px !important;
    }
    .container {
        max-width: 800px !important;
        margin: auto !important;
    }
    """

    with gr.Blocks(css=css, analytics_enabled=False) as demo:
        gr.Markdown(
            """
            # 🎙️ Edge TTS Text-to-Speech
            লেখা বা ফাইল (PDF, EPUB, TXT) থেকে উচ্চ-মানের কণ্ঠস্বরে রূপান্তর করুন। বাংলা ভাষায় স্বাভাবিক এবং সুন্দর কণ্ঠস্বর উপভোগ করুন।
            """,
            elem_classes=["container"]
        )
        
        with gr.Group():
            gr.Markdown("### ফাইল আপলোড এবং লেখা ইনপুট")
            file_input = gr.File(
                label="ফাইল আপলোড করুন (PDF, EPUB, TXT)",
                file_types=[".pdf", ".epub", ".txt"],
                file_count="single"
            )
            text_input = gr.Textbox(
                label="প্রদত্ত লেখা",
                lines=5,
                placeholder="এখানে আপনার বাংলা লেখা লিখুন বা ফাইল আপলোড করুন, যেমন: 'আমি বাংলায় কথা বলি।'",
                show_copy_button=True
            )
            voice_dropdown = gr.Dropdown(
                choices=[""] + list(voices.keys()),
                label="বাচনভঙ্গি এবং কণ্ঠস্বর",
                value="",
                info="একটি কণ্ঠস্বর নির্বাচন করুন।"
            )
            rate_slider = gr.Slider(
                minimum=-50,
                maximum=50,
                value=0,
                label="Speech Rate Adjustment (%)",
                step=1,
                info="কথার গতি সামঞ্জস্য করুন: 0% ডিফল্ট, +50% দ্রুত, -50% ধীর।"
            )
            pitch_slider = gr.Slider(
                minimum=-20,
                maximum=20,
                value=0,
                label="Pitch Adjustment (Hz)",
                step=1,
                info="কণ্ঠের স্বর সামঞ্জস্য করুন: 0 Hz ডিফল্ট, +20 Hz উচ্চ, -20 Hz নিম্ন।"
            )
            
            with gr.Row():
                generate_btn = gr.Button("লেখা থেকে কণ্ঠস্বরে রূপান্তর করুন", variant="primary")
                reset_btn = gr.Button("রিসেট করুন", variant="secondary")
        
        with gr.Group():
            gr.Markdown("### আউটপুট")
            audio_output = gr.Audio(label="Generated Audio", type="filepath")
            download_output = gr.File(label="অডিও ফাইল ডাউনলোড করুন")
            warning_md = gr.Markdown(label="Warning", visible=False)
        
        # Event handlers
        file_input.change(
            fn=handle_file_upload,
            inputs=[file_input],
            outputs=[text_input, warning_md],
            show_progress=True
        )
        generate_btn.click(
            fn=tts_interface,
            inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
            outputs=[audio_output, download_output, warning_md],
            show_progress=True
        )
        reset_btn.click(
            fn=reset_fields,
            inputs=[],
            outputs=[file_input, text_input, voice_dropdown, rate_slider, pitch_slider, audio_output, download_output, warning_md]
        )
    
    return demo

async def main():
    demo = await create_demo()
    demo.queue(default_concurrency_limit=50)
    demo.launch(show_api=False)

if __name__ == "__main__":
    asyncio.run(main())