| | import tempfile |
| |
|
| | import edge_tts |
| | import gradio as gr |
| | from gradio_client import Client |
| | import pyarabic.araby as araby |
| |
|
| | language_dict = { |
| | "English": { |
| | "Jenny": "en-US-JennyNeural", |
| | "Guy": "en-US-GuyNeural", |
| | "Ana": "en-US-AnaNeural", |
| | "Aria": "en-US-AriaNeural", |
| | "Christopher": "en-US-ChristopherNeural", |
| | "Eric": "en-US-EricNeural", |
| | "Michelle": "en-US-MichelleNeural", |
| | "Roger": "en-US-RogerNeural", |
| | "Natasha": "en-AU-NatashaNeural", |
| | "William": "en-AU-WilliamNeural", |
| | "Clara": "en-CA-ClaraNeural", |
| | "Liam": "en-CA-LiamNeural", |
| | "Libby": "en-GB-LibbyNeural", |
| | "Maisie": "en-GB-MaisieNeural", |
| | "Ryan": "en-GB-RyanNeural", |
| | "Sonia": "en-GB-SoniaNeural", |
| | "Thomas": "en-GB-ThomasNeural", |
| | "Sam": "en-HK-SamNeural", |
| | "Yan": "en-HK-YanNeural", |
| | "Connor": "en-IE-ConnorNeural", |
| | "Emily": "en-IE-EmilyNeural", |
| | "Neerja": "en-IN-NeerjaNeural", |
| | "Prabhat": "en-IN-PrabhatNeural", |
| | "Asilia": "en-KE-AsiliaNeural", |
| | "Chilemba": "en-KE-ChilembaNeural", |
| | "Abeo": "en-NG-AbeoNeural", |
| | "Ezinne": "en-NG-EzinneNeural", |
| | "Mitchell": "en-NZ-MitchellNeural", |
| | "James": "en-PH-JamesNeural", |
| | "Rosa": "en-PH-RosaNeural", |
| | "Luna": "en-SG-LunaNeural", |
| | "Wayne": "en-SG-WayneNeural", |
| | "Elimu": "en-TZ-ElimuNeural", |
| | "Imani": "en-TZ-ImaniNeural", |
| | "Leah": "en-ZA-LeahNeural", |
| | "Luke": "en-ZA-LukeNeural" |
| | }, |
| | "Spanish": { |
| | "Elena": "es-AR-ElenaNeural", |
| | "Tomas": "es-AR-TomasNeural", |
| | "Marcelo": "es-BO-MarceloNeural", |
| | "Sofia": "es-BO-SofiaNeural", |
| | "Gonzalo": "es-CO-GonzaloNeural", |
| | "Salome": "es-CO-SalomeNeural", |
| | "Juan": "es-CR-JuanNeural", |
| | "Maria": "es-CR-MariaNeural", |
| | "Belkys": "es-CU-BelkysNeural", |
| | "Emilio": "es-DO-EmilioNeural", |
| | "Ramona": "es-DO-RamonaNeural", |
| | "Andrea": "es-EC-AndreaNeural", |
| | "Luis": "es-EC-LuisNeural", |
| | "Alvaro": "es-ES-AlvaroNeural", |
| | "Elvira": "es-ES-ElviraNeural", |
| | "Teresa": "es-GQ-TeresaNeural", |
| | "Andres": "es-GT-AndresNeural", |
| | "Marta": "es-GT-MartaNeural", |
| | "Carlos": "es-HN-CarlosNeural", |
| | "Karla": "es-HN-KarlaNeural", |
| | "Federico": "es-NI-FedericoNeural", |
| | "Yolanda": "es-NI-YolandaNeural", |
| | "Margarita": "es-PA-MargaritaNeural", |
| | "Roberto": "es-PA-RobertoNeural", |
| | "Alex": "es-PE-AlexNeural", |
| | "Camila": "es-PE-CamilaNeural", |
| | "Karina": "es-PR-KarinaNeural", |
| | "Victor": "es-PR-VictorNeural", |
| | "Mario": "es-PY-MarioNeural", |
| | "Tania": "es-PY-TaniaNeural", |
| | "Lorena": "es-SV-LorenaNeural", |
| | "Rodrigo": "es-SV-RodrigoNeural", |
| | "Alonso": "es-US-AlonsoNeural", |
| | "Paloma": "es-US-PalomaNeural", |
| | "Mateo": "es-UY-MateoNeural", |
| | "Valentina": "es-UY-ValentinaNeural", |
| | "Paola": "es-VE-PaolaNeural", |
| | "Sebastian": "es-VE-SebastianNeural" |
| | }, |
| | "Arabic": { |
| | "Hamed": "ar-SA-HamedNeural", |
| | "Zariyah": "ar-SA-ZariyahNeural", |
| | "Fatima": "ar-AE-FatimaNeural", |
| | "Hamdan": "ar-AE-HamdanNeural", |
| | "Ali": "ar-BH-AliNeural", |
| | "Laila": "ar-BH-LailaNeural", |
| | "Ismael": "ar-DZ-IsmaelNeural", |
| | "Salma": "ar-EG-SalmaNeural", |
| | "Shakir": "ar-EG-ShakirNeural", |
| | "Bassel": "ar-IQ-BasselNeural", |
| | "Rana": "ar-IQ-RanaNeural", |
| | "Sana": "ar-JO-SanaNeural", |
| | "Taim": "ar-JO-TaimNeural", |
| | "Fahed": "ar-KW-FahedNeural", |
| | "Noura": "ar-KW-NouraNeural", |
| | "Layla": "ar-LB-LaylaNeural", |
| | "Rami": "ar-LB-RamiNeural", |
| | "Iman": "ar-LY-ImanNeural", |
| | "Omar": "ar-LY-OmarNeural", |
| | "Jamal": "ar-MA-JamalNeural", |
| | "Mouna": "ar-MA-MounaNeural", |
| | "Abdullah": "ar-OM-AbdullahNeural", |
| | "Aysha": "ar-OM-AyshaNeural", |
| | "Amal": "ar-QA-AmalNeural", |
| | "Moaz": "ar-QA-MoazNeural", |
| | "Amany": "ar-SY-AmanyNeural", |
| | "Laith": "ar-SY-LaithNeural", |
| | "Hedi": "ar-TN-HediNeural", |
| | "Reem": "ar-TN-ReemNeural", |
| | "Maryam": "ar-YE-MaryamNeural", |
| | "Saleh": "ar-YE-SalehNeural" |
| | }, |
| | "Korean": { |
| | "Sun-Hi": "ko-KR-SunHiNeural", |
| | "InJoon": "ko-KR-InJoonNeural" |
| | }, |
| | "Thai": { |
| | "Premwadee": "th-TH-PremwadeeNeural", |
| | "Niwat": "th-TH-NiwatNeural" |
| | }, |
| | "Vietnamese": { |
| | "HoaiMy": "vi-VN-HoaiMyNeural", |
| | "NamMinh": "vi-VN-NamMinhNeural" |
| | }, |
| | "Japanese": { |
| | "Nanami": "ja-JP-NanamiNeural", |
| | "Keita": "ja-JP-KeitaNeural" |
| | }, |
| | "French": { |
| | "Denise": "fr-FR-DeniseNeural", |
| | "Eloise": "fr-FR-EloiseNeural", |
| | "Henri": "fr-FR-HenriNeural", |
| | "Sylvie": "fr-CA-SylvieNeural", |
| | "Antoine": "fr-CA-AntoineNeural", |
| | "Jean": "fr-CA-JeanNeural", |
| | "Ariane": "fr-CH-ArianeNeural", |
| | "Fabrice": "fr-CH-FabriceNeural", |
| | "Charline": "fr-BE-CharlineNeural", |
| | "Gerard": "fr-BE-GerardNeural" |
| | }, |
| | "Portuguese": { |
| | "Francisca": "pt-BR-FranciscaNeural", |
| | "Antonio": "pt-BR-AntonioNeural", |
| | "Duarte": "pt-PT-DuarteNeural", |
| | "Raquel": "pt-PT-RaquelNeural" |
| | }, |
| | "Indonesian": { |
| | "Ardi": "id-ID-ArdiNeural", |
| | "Gadis": "id-ID-GadisNeural" |
| | }, |
| | "Hebrew": { |
| | "Avri": "he-IL-AvriNeural", |
| | "Hila": "he-IL-HilaNeural" |
| | }, |
| | "Italian": { |
| | "Isabella": "it-IT-IsabellaNeural", |
| | "Diego": "it-IT-DiegoNeural", |
| | "Elsa": "it-IT-ElsaNeural" |
| | }, |
| | "Dutch": { |
| | "Colette": "nl-NL-ColetteNeural", |
| | "Fenna": "nl-NL-FennaNeural", |
| | "Maarten": "nl-NL-MaartenNeural", |
| | "Arnaud": "nl-BE-ArnaudNeural", |
| | "Dena": "nl-BE-DenaNeural" |
| | }, |
| | "Malay": { |
| | "Osman": "ms-MY-OsmanNeural", |
| | "Yasmin": "ms-MY-YasminNeural" |
| | }, |
| | "Norwegian": { |
| | "Pernille": "nb-NO-PernilleNeural", |
| | "Finn": "nb-NO-FinnNeural" |
| | }, |
| | "Swedish": { |
| | "Sofie": "sv-SE-SofieNeural", |
| | "Mattias": "sv-SE-MattiasNeural" |
| | }, |
| | "Greek": { |
| | "Athina": "el-GR-AthinaNeural", |
| | "Nestoras": "el-GR-NestorasNeural" |
| | }, |
| | "German": { |
| | "Katja": "de-DE-KatjaNeural", |
| | "Amala": "de-DE-AmalaNeural", |
| | "Conrad": "de-DE-ConradNeural", |
| | "Killian": "de-DE-KillianNeural", |
| | "Ingrid": "de-AT-IngridNeural", |
| | "Jonas": "de-AT-JonasNeural", |
| | "Jan": "de-CH-JanNeural", |
| | "Leni": "de-CH-LeniNeural" |
| | }, |
| | "Afrikaans": { |
| | "Adri": "af-ZA-AdriNeural", |
| | "Willem": "af-ZA-WillemNeural" |
| | }, |
| | "Amharic": { |
| | "Ameha": "am-ET-AmehaNeural", |
| | "Mekdes": "am-ET-MekdesNeural" |
| | }, |
| | "Azerbaijani": { |
| | "Babek": "az-AZ-BabekNeural", |
| | "Banu": "az-AZ-BanuNeural" |
| | }, |
| | "Bulgarian": { |
| | "Borislav": "bg-BG-BorislavNeural", |
| | "Kalina": "bg-BG-KalinaNeural" |
| | }, |
| | "Bengali": { |
| | "Nabanita": "bn-BD-NabanitaNeural", |
| | "Pradeep": "bn-BD-PradeepNeural", |
| | "Bashkar": "bn-IN-BashkarNeural", |
| | "Tanishaa": "bn-IN-TanishaaNeural" |
| | }, |
| | "Bosnian": { |
| | "Goran": "bs-BA-GoranNeural", |
| | "Vesna": "bs-BA-VesnaNeural" |
| | }, |
| | "Catalan": { |
| | "Joana": "ca-ES-JoanaNeural", |
| | "Enric": "ca-ES-EnricNeural" |
| | }, |
| | "Czech": { |
| | "Antonin": "cs-CZ-AntoninNeural", |
| | "Vlasta": "cs-CZ-VlastaNeural" |
| | }, |
| | "Welsh": { |
| | "Aled": "cy-GB-AledNeural", |
| | "Nia": "cy-GB-NiaNeural" |
| | }, |
| | "Danish": { |
| | "Christel": "da-DK-ChristelNeural", |
| | "Jeppe": "da-DK-JeppeNeural" |
| | }, |
| | "Estonian": { |
| | "Anu": "et-EE-AnuNeural", |
| | "Kert": "et-EE-KertNeural" |
| | }, |
| | "Persian": { |
| | "Dilara": "fa-IR-DilaraNeural", |
| | "Farid": "fa-IR-FaridNeural" |
| | }, |
| | "Finnish": { |
| | "Harri": "fi-FI-HarriNeural", |
| | "Noora": "fi-FI-NooraNeural" |
| | }, |
| | "Irish": { |
| | "Colm": "ga-IE-ColmNeural", |
| | "Orla": "ga-IE-OrlaNeural" |
| | }, |
| | "Galician": { |
| | "Roi": "gl-ES-RoiNeural", |
| | "Sabela": "gl-ES-SabelaNeural" |
| | }, |
| | "Gujarati": { |
| | "Dhwani": "gu-IN-DhwaniNeural", |
| | "Niranjan": "gu-IN-NiranjanNeural" |
| | }, |
| | "Hindi": { |
| | "Madhur": "hi-IN-MadhurNeural", |
| | "Swara": "hi-IN-SwaraNeural" |
| | }, |
| | "Croatian": { |
| | "Gabrijela": "hr-HR-GabrijelaNeural", |
| | "Srecko": "hr-HR-SreckoNeural" |
| | }, |
| | "Hungarian": { |
| | "Noemi": "hu-HU-NoemiNeural", |
| | "Tamas": "hu-HU-TamasNeural" |
| | }, |
| | "Icelandic": { |
| | "Gudrun": "is-IS-GudrunNeural", |
| | "Gunnar": "is-IS-GunnarNeural" |
| | }, |
| | "Javanese": { |
| | "Dimas": "jv-ID-DimasNeural", |
| | "Siti": "jv-ID-SitiNeural" |
| | }, |
| | "Georgian": { |
| | "Eka": "ka-GE-EkaNeural", |
| | "Giorgi": "ka-GE-GiorgiNeural" |
| | }, |
| | "Kazakh": { |
| | "Aigul": "kk-KZ-AigulNeural", |
| | "Daulet": "kk-KZ-DauletNeural" |
| | }, |
| | "Khmer": { |
| | "Piseth": "km-KH-PisethNeural", |
| | "Sreymom": "km-KH-SreymomNeural" |
| | }, |
| | "Kannada": { |
| | "Gagan": "kn-IN-GaganNeural", |
| | "Sapna": "kn-IN-SapnaNeural" |
| | }, |
| | "Lao": { |
| | "Chanthavong": "lo-LA-ChanthavongNeural", |
| | "Keomany": "lo-LA-KeomanyNeural" |
| | }, |
| | "Lithuanian": { |
| | "Leonas": "lt-LT-LeonasNeural", |
| | "Ona": "lt-LT-OnaNeural" |
| | }, |
| | "Latvian": { |
| | "Everita": "lv-LV-EveritaNeural", |
| | "Nils": "lv-LV-NilsNeural" |
| | }, |
| | "Macedonian": { |
| | "Aleksandar": "mk-MK-AleksandarNeural", |
| | "Marija": "mk-MK-MarijaNeural" |
| | }, |
| | "Malayalam": { |
| | "Midhun": "ml-IN-MidhunNeural", |
| | "Sobhana": "ml-IN-SobhanaNeural" |
| | }, |
| | "Mongolian": { |
| | "Bataa": "mn-MN-BataaNeural", |
| | "Yesui": "mn-MN-YesuiNeural" |
| | }, |
| | "Marathi": { |
| | "Aarohi": "mr-IN-AarohiNeural", |
| | "Manohar": "mr-IN-ManoharNeural" |
| | }, |
| | "Maltese": { |
| | "Grace": "mt-MT-GraceNeural", |
| | "Joseph": "mt-MT-JosephNeural" |
| | }, |
| | "Burmese": { |
| | "Nilar": "my-MM-NilarNeural", |
| | "Thiha": "my-MM-ThihaNeural" |
| | }, |
| | "Nepali": { |
| | "Hemkala": "ne-NP-HemkalaNeural", |
| | "Sagar": "ne-NP-SagarNeural" |
| | }, |
| | "Polish": { |
| | "Marek": "pl-PL-MarekNeural", |
| | "Zofia": "pl-PL-ZofiaNeural" |
| | }, |
| | "Pashto": { |
| | "Gul Nawaz": "ps-AF-GulNawazNeural", |
| | "Latifa": "ps-AF-LatifaNeural" |
| | }, |
| | "Romanian": { |
| | "Alina": "ro-RO-AlinaNeural", |
| | "Emil": "ro-RO-EmilNeural" |
| | }, |
| | "Russian": { |
| | "Svetlana": "ru-RU-SvetlanaNeural", |
| | "Dmitry": "ru-RU-DmitryNeural" |
| | }, |
| | "Sinhala": { |
| | "Sameera": "si-LK-SameeraNeural", |
| | "Thilini": "si-LK-ThiliniNeural" |
| | }, |
| | "Slovak": { |
| | "Lukas": "sk-SK-LukasNeural", |
| | "Viktoria": "sk-SK-ViktoriaNeural" |
| | }, |
| | "Slovenian": { |
| | "Petra": "sl-SI-PetraNeural", |
| | "Rok": "sl-SI-RokNeural" |
| | }, |
| | "Somali": { |
| | "Muuse": "so-SO-MuuseNeural", |
| | "Ubax": "so-SO-UbaxNeural" |
| | }, |
| | "Albanian": { |
| | "Anila": "sq-AL-AnilaNeural", |
| | "Ilir": "sq-AL-IlirNeural" |
| | }, |
| | "Serbian": { |
| | "Nicholas": "sr-RS-NicholasNeural", |
| | "Sophie": "sr-RS-SophieNeural" |
| | }, |
| | "Sundanese": { |
| | "Jajang": "su-ID-JajangNeural", |
| | "Tuti": "su-ID-TutiNeural" |
| | }, |
| | "Swahili": { |
| | "Rafiki": "sw-KE-RafikiNeural", |
| | "Zuri": "sw-KE-ZuriNeural", |
| | "Daudi": "sw-TZ-DaudiNeural", |
| | "Rehema": "sw-TZ-RehemaNeural" |
| | }, |
| | "Tamil": { |
| | "Pallavi": "ta-IN-PallaviNeural", |
| | "Valluvar": "ta-IN-ValluvarNeural", |
| | "Kumar": "ta-LK-KumarNeural", |
| | "Saranya": "ta-LK-SaranyaNeural", |
| | "Kani": "ta-MY-KaniNeural", |
| | "Surya": "ta-MY-SuryaNeural", |
| | "Anbu": "ta-SG-AnbuNeural" |
| | }, |
| | "Telugu": { |
| | "Mohan": "te-IN-MohanNeural", |
| | "Shruti": "te-IN-ShrutiNeural" |
| | }, |
| | "Turkish": { |
| | "Ahmet": "tr-TR-AhmetNeural", |
| | "Emel": "tr-TR-EmelNeural" |
| | }, |
| | "Ukrainian": { |
| | "Ostap": "uk-UA-OstapNeural", |
| | "Polina": "uk-UA-PolinaNeural" |
| | }, |
| | "Urdu": { |
| | "Gul": "ur-IN-GulNeural", |
| | "Salman": "ur-IN-SalmanNeural", |
| | "Asad": "ur-PK-AsadNeural", |
| | "Uzma": "ur-PK-UzmaNeural" |
| | }, |
| | "Uzbek": { |
| | "Madina": "uz-UZ-MadinaNeural", |
| | "Sardor": "uz-UZ-SardorNeural" |
| | }, |
| | "Mandarin": { |
| | "Xiaoxiao": "zh-CN-XiaoxiaoNeural", |
| | "Yunyang": "zh-CN-YunyangNeural", |
| | "Yunxi": "zh-CN-YunxiNeural", |
| | "Xiaoyi": "zh-CN-XiaoyiNeural", |
| | "Yunjian": "zh-CN-YunjianNeural", |
| | "Yunxia": "zh-CN-YunxiaNeural", |
| | "Xiaobei": "zh-CN-liaoning-XiaobeiNeural", |
| | "Xiaoni": "zh-CN-shaanxi-XiaoniNeural", |
| | "HiuMaan": "zh-HK-HiuMaanNeural", |
| | "HiuGaai": "zh-HK-HiuGaaiNeural", |
| | "WanLung": "zh-HK-WanLungNeural", |
| | "HsiaoChen": "zh-TW-HsiaoChenNeural", |
| | "HsiaoYu": "zh-TW-HsiaoYuNeural", |
| | "YunJhe": "zh-TW-YunJheNeural" |
| | }, |
| | "Zulu": { |
| | "Thando": "zu-ZA-ThandoNeural", |
| | "Themba": "zu-ZA-ThembaNeural" |
| | } |
| | } |
| |
|
| | client = Client("MohamedRashad/arabic-auto-tashkeel") |
| |
|
| | async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False): |
| |
|
| | |
| | if language_code == "Arabic" and tashkeel_checkbox: |
| | text = client.predict( |
| | input_text=araby.strip_diacritics(text), |
| | api_name="/infer_shakkala" |
| | ) |
| | |
| | |
| | voice = language_dict[language_code][speaker] |
| | communicate = edge_tts.Communicate(text, voice) |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
| | tmp_path = tmp_file.name |
| | await communicate.save(tmp_path) |
| |
|
| | return text, tmp_path |
| |
|
| |
|
| | def get_speakers(language): |
| | print(language) |
| | speakers = list(language_dict[language].keys()) |
| | return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True) |
| |
|
| |
|
| | default_language = None |
| | default_speaker = None |
| | with gr.Blocks(title="Multilingual TTS") as demo: |
| | gr.HTML("<center><h1>Multilingual TTS (Edge TTS)</h1></center>") |
| | gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} languages supported</h3>") |
| | gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>") |
| | gr.Markdown("**Note:** A special feature is added for Arabic language only.") |
| | with gr.Row(): |
| | with gr.Column(): |
| | input_text = gr.Textbox(lines=5, label="Input Text", placeholder="Enter text to convert to speech") |
| | language = gr.Dropdown( |
| | choices=list(language_dict.keys()), value=default_language, label="Languages", interactive=True |
| | ) |
| | speaker = gr.Dropdown(choices=[], value=default_speaker, label="Speakers", interactive=False) |
| | tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False) |
| | run_btn = gr.Button(value="Generate Audio", variant="primary") |
| |
|
| | with gr.Column(): |
| | output_text = gr.Textbox(label="Output Text") |
| | output_audio = gr.Audio(type="filepath", label="Audio Output") |
| |
|
| | language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox]) |
| | run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox], outputs=[output_text, output_audio]) |
| |
|
| | if __name__ == "__main__": |
| | demo.queue().launch(share=True) |
| |
|