Spaces:
Running
Running
Upload 3 files
Browse files- lang_data.py +215 -0
- microsoft_tts.py +340 -0
- requirements.txt +6 -0
lang_data.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
languages = {
|
| 2 |
+
"Afrikaans": "af",
|
| 3 |
+
"Amharic": "am",
|
| 4 |
+
"Arabic": "ar",
|
| 5 |
+
"Azerbaijani": "az",
|
| 6 |
+
"Bulgarian": "bg",
|
| 7 |
+
"Bengali": "bn",
|
| 8 |
+
"Bosnian": "bs",
|
| 9 |
+
"Catalan": "ca",
|
| 10 |
+
"Czech": "cs",
|
| 11 |
+
"Welsh": "cy",
|
| 12 |
+
"Danish": "da",
|
| 13 |
+
"German": "de",
|
| 14 |
+
"Greek": "el",
|
| 15 |
+
"English": "en",
|
| 16 |
+
"Spanish": "es",
|
| 17 |
+
"French": "fr",
|
| 18 |
+
"Irish": "ga",
|
| 19 |
+
"Galician": "gl",
|
| 20 |
+
"Gujarati": "gu",
|
| 21 |
+
"Hebrew": "he",
|
| 22 |
+
"Hindi": "hi",
|
| 23 |
+
"Croatian": "hr",
|
| 24 |
+
"Hungarian": "hu",
|
| 25 |
+
"Indonesian": "id",
|
| 26 |
+
"Icelandic": "is",
|
| 27 |
+
"Italian": "it",
|
| 28 |
+
"Japanese": "ja",
|
| 29 |
+
"Javanese": "jv",
|
| 30 |
+
"Georgian": "ka",
|
| 31 |
+
"Kazakh": "kk",
|
| 32 |
+
"Khmer": "km",
|
| 33 |
+
"Kannada": "kn",
|
| 34 |
+
"Korean": "ko",
|
| 35 |
+
"Lao": "lo",
|
| 36 |
+
"Lithuanian": "lt",
|
| 37 |
+
"Latvian": "lv",
|
| 38 |
+
"Macedonian": "mk",
|
| 39 |
+
"Malayalam": "ml",
|
| 40 |
+
"Mongolian": "mn",
|
| 41 |
+
"Marathi": "mr",
|
| 42 |
+
"Malay": "ms",
|
| 43 |
+
"Maltese": "mt",
|
| 44 |
+
"Burmese": "my",
|
| 45 |
+
"Norwegian Bokmål": "nb",
|
| 46 |
+
"Nepali": "ne",
|
| 47 |
+
"Dutch": "nl",
|
| 48 |
+
"Polish": "pl",
|
| 49 |
+
"Pashto": "ps",
|
| 50 |
+
"Portuguese": "pt",
|
| 51 |
+
"Romanian": "ro",
|
| 52 |
+
"Russian": "ru",
|
| 53 |
+
"Sinhala": "si",
|
| 54 |
+
"Slovak": "sk",
|
| 55 |
+
"Slovenian": "sl",
|
| 56 |
+
"Somali": "so",
|
| 57 |
+
"Albanian": "sq",
|
| 58 |
+
"Serbian": "sr",
|
| 59 |
+
"Sundanese": "su",
|
| 60 |
+
"Swedish": "sv",
|
| 61 |
+
"Swahili": "sw",
|
| 62 |
+
"Tamil": "ta",
|
| 63 |
+
"Telugu": "te",
|
| 64 |
+
"Thai": "th",
|
| 65 |
+
"Turkish": "tr",
|
| 66 |
+
"Ukrainian": "uk",
|
| 67 |
+
"Urdu": "ur",
|
| 68 |
+
"Uzbek": "uz",
|
| 69 |
+
"Vietnamese": "vi",
|
| 70 |
+
"Chinese": "zh",
|
| 71 |
+
"Zulu": "zu"
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
female_voice_list={'Vietnamese': 'vi-VN-HoaiMyNeural',
|
| 77 |
+
'Bengali': 'bn-BD-NabanitaNeural',
|
| 78 |
+
'Thai': 'th-TH-PremwadeeNeural',
|
| 79 |
+
'English': "en-US-AvaMultilingualNeural", #'en-AU-NatashaNeural', #"en-IE-EmilyNeural"
|
| 80 |
+
'Portuguese': 'pt-BR-FranciscaNeural',
|
| 81 |
+
'Arabic': 'ar-AE-FatimaNeural',
|
| 82 |
+
'Turkish': 'tr-TR-EmelNeural',
|
| 83 |
+
'Spanish': 'es-AR-ElenaNeural',
|
| 84 |
+
'Korean': 'ko-KR-SunHiNeural',
|
| 85 |
+
'French': 'fr-BE-CharlineNeural',
|
| 86 |
+
'Indonesian': 'id-ID-GadisNeural',
|
| 87 |
+
'Russian': 'ru-RU-SvetlanaNeural',
|
| 88 |
+
'Hindi': 'hi-IN-SwaraNeural',
|
| 89 |
+
'Japanese': 'ja-JP-NanamiNeural',
|
| 90 |
+
'Afrikaans': 'af-ZA-AdriNeural',
|
| 91 |
+
'Amharic': 'am-ET-MekdesNeural',
|
| 92 |
+
'Azerbaijani': 'az-AZ-BanuNeural',
|
| 93 |
+
'Bulgarian': 'bg-BG-KalinaNeural',
|
| 94 |
+
'Bosnian': 'bs-BA-VesnaNeural',
|
| 95 |
+
'Catalan': 'ca-ES-JoanaNeural',
|
| 96 |
+
'Czech': 'cs-CZ-VlastaNeural',
|
| 97 |
+
'Welsh': 'cy-GB-NiaNeural',
|
| 98 |
+
'Danish': 'da-DK-ChristelNeural',
|
| 99 |
+
'German': 'de-AT-IngridNeural',
|
| 100 |
+
'Greek': 'el-GR-AthinaNeural',
|
| 101 |
+
'Irish': 'ga-IE-OrlaNeural',
|
| 102 |
+
'Galician': 'gl-ES-SabelaNeural',
|
| 103 |
+
'Gujarati': 'gu-IN-DhwaniNeural',
|
| 104 |
+
'Hebrew': 'he-IL-HilaNeural',
|
| 105 |
+
'Croatian': 'hr-HR-GabrijelaNeural',
|
| 106 |
+
'Hungarian': 'hu-HU-NoemiNeural',
|
| 107 |
+
'Icelandic': 'is-IS-GudrunNeural',
|
| 108 |
+
'Italian': 'it-IT-ElsaNeural',
|
| 109 |
+
'Javanese': 'jv-ID-SitiNeural',
|
| 110 |
+
'Georgian': 'ka-GE-EkaNeural',
|
| 111 |
+
'Kazakh': 'kk-KZ-AigulNeural',
|
| 112 |
+
'Khmer': 'km-KH-SreymomNeural',
|
| 113 |
+
'Kannada': 'kn-IN-SapnaNeural',
|
| 114 |
+
'Lao': 'lo-LA-KeomanyNeural',
|
| 115 |
+
'Lithuanian': 'lt-LT-OnaNeural',
|
| 116 |
+
'Latvian': 'lv-LV-EveritaNeural',
|
| 117 |
+
'Macedonian': 'mk-MK-MarijaNeural',
|
| 118 |
+
'Malayalam': 'ml-IN-SobhanaNeural',
|
| 119 |
+
'Mongolian': 'mn-MN-YesuiNeural',
|
| 120 |
+
'Marathi': 'mr-IN-AarohiNeural',
|
| 121 |
+
'Malay': 'ms-MY-YasminNeural',
|
| 122 |
+
'Maltese': 'mt-MT-GraceNeural',
|
| 123 |
+
'Burmese': 'my-MM-NilarNeural',
|
| 124 |
+
'Norwegian Bokmål': 'nb-NO-PernilleNeural',
|
| 125 |
+
'Nepali': 'ne-NP-HemkalaNeural',
|
| 126 |
+
'Dutch': 'nl-BE-DenaNeural',
|
| 127 |
+
'Polish': 'pl-PL-ZofiaNeural',
|
| 128 |
+
'Pashto': 'ps-AF-LatifaNeural',
|
| 129 |
+
'Romanian': 'ro-RO-AlinaNeural',
|
| 130 |
+
'Sinhala': 'si-LK-ThiliniNeural',
|
| 131 |
+
'Slovak': 'sk-SK-ViktoriaNeural',
|
| 132 |
+
'Slovenian': 'sl-SI-PetraNeural',
|
| 133 |
+
'Somali': 'so-SO-UbaxNeural',
|
| 134 |
+
'Albanian': 'sq-AL-AnilaNeural',
|
| 135 |
+
'Serbian': 'sr-RS-SophieNeural',
|
| 136 |
+
'Sundanese': 'su-ID-TutiNeural',
|
| 137 |
+
'Swedish': 'sv-SE-SofieNeural',
|
| 138 |
+
'Swahili': 'sw-KE-ZuriNeural',
|
| 139 |
+
'Tamil': 'ta-IN-PallaviNeural',
|
| 140 |
+
'Telugu': 'te-IN-ShrutiNeural',
|
| 141 |
+
'Chinese': 'zh-CN-XiaoxiaoNeural',
|
| 142 |
+
'Ukrainian': 'uk-UA-PolinaNeural',
|
| 143 |
+
'Urdu': 'ur-IN-GulNeural',
|
| 144 |
+
'Uzbek': 'uz-UZ-MadinaNeural',
|
| 145 |
+
'Zulu': 'zu-ZA-ThandoNeural'}
|
| 146 |
+
male_voice_list= {'Vietnamese': 'vi-VN-NamMinhNeural',
|
| 147 |
+
'Bengali': 'bn-BD-PradeepNeural',
|
| 148 |
+
'Thai': 'th-TH-NiwatNeural',
|
| 149 |
+
'English': 'en-US-BrianMultilingualNeural', #"en-US-BrianNeural"
|
| 150 |
+
'Portuguese': 'pt-BR-AntonioNeural',
|
| 151 |
+
'Arabic': 'ar-AE-HamdanNeural',
|
| 152 |
+
'Turkish': 'tr-TR-AhmetNeural',
|
| 153 |
+
'Spanish': 'es-AR-TomasNeural',
|
| 154 |
+
'Korean': 'ko-KR-HyunsuNeural',
|
| 155 |
+
'French': 'fr-BE-GerardNeural',
|
| 156 |
+
'Indonesian': 'id-ID-ArdiNeural',
|
| 157 |
+
'Russian': 'ru-RU-DmitryNeural',
|
| 158 |
+
'Hindi': 'hi-IN-MadhurNeural',
|
| 159 |
+
'Japanese': 'ja-JP-KeitaNeural',
|
| 160 |
+
'Afrikaans': 'af-ZA-WillemNeural',
|
| 161 |
+
'Amharic': 'am-ET-AmehaNeural',
|
| 162 |
+
'Azerbaijani': 'az-AZ-BabekNeural',
|
| 163 |
+
'Bulgarian': 'bg-BG-BorislavNeural',
|
| 164 |
+
'Bosnian': 'bs-BA-GoranNeural',
|
| 165 |
+
'Catalan': 'ca-ES-EnricNeural',
|
| 166 |
+
'Czech': 'cs-CZ-AntoninNeural',
|
| 167 |
+
'Welsh': 'cy-GB-AledNeural',
|
| 168 |
+
'Danish': 'da-DK-JeppeNeural',
|
| 169 |
+
'German': 'de-AT-JonasNeural',
|
| 170 |
+
'Greek': 'el-GR-NestorasNeural',
|
| 171 |
+
'Irish': 'ga-IE-ColmNeural',
|
| 172 |
+
'Galician': 'gl-ES-RoiNeural',
|
| 173 |
+
'Gujarati': 'gu-IN-NiranjanNeural',
|
| 174 |
+
'Hebrew': 'he-IL-AvriNeural',
|
| 175 |
+
'Croatian': 'hr-HR-SreckoNeural',
|
| 176 |
+
'Hungarian': 'hu-HU-TamasNeural',
|
| 177 |
+
'Icelandic': 'is-IS-GunnarNeural',
|
| 178 |
+
'Italian': 'it-IT-DiegoNeural',
|
| 179 |
+
'Javanese': 'jv-ID-DimasNeural',
|
| 180 |
+
'Georgian': 'ka-GE-GiorgiNeural',
|
| 181 |
+
'Kazakh': 'kk-KZ-DauletNeural',
|
| 182 |
+
'Khmer': 'km-KH-PisethNeural',
|
| 183 |
+
'Kannada': 'kn-IN-GaganNeural',
|
| 184 |
+
'Lao': 'lo-LA-ChanthavongNeural',
|
| 185 |
+
'Lithuanian': 'lt-LT-LeonasNeural',
|
| 186 |
+
'Latvian': 'lv-LV-NilsNeural',
|
| 187 |
+
'Macedonian': 'mk-MK-AleksandarNeural',
|
| 188 |
+
'Malayalam': 'ml-IN-MidhunNeural',
|
| 189 |
+
'Mongolian': 'mn-MN-BataaNeural',
|
| 190 |
+
'Marathi': 'mr-IN-ManoharNeural',
|
| 191 |
+
'Malay': 'ms-MY-OsmanNeural',
|
| 192 |
+
'Maltese': 'mt-MT-JosephNeural',
|
| 193 |
+
'Burmese': 'my-MM-ThihaNeural',
|
| 194 |
+
'Norwegian Bokmål': 'nb-NO-FinnNeural',
|
| 195 |
+
'Nepali': 'ne-NP-SagarNeural',
|
| 196 |
+
'Dutch': 'nl-BE-ArnaudNeural',
|
| 197 |
+
'Polish': 'pl-PL-MarekNeural',
|
| 198 |
+
'Pashto': 'ps-AF-GulNawazNeural',
|
| 199 |
+
'Romanian': 'ro-RO-EmilNeural',
|
| 200 |
+
'Sinhala': 'si-LK-SameeraNeural',
|
| 201 |
+
'Slovak': 'sk-SK-LukasNeural',
|
| 202 |
+
'Slovenian': 'sl-SI-RokNeural',
|
| 203 |
+
'Somali': 'so-SO-MuuseNeural',
|
| 204 |
+
'Albanian': 'sq-AL-IlirNeural',
|
| 205 |
+
'Serbian': 'sr-RS-NicholasNeural',
|
| 206 |
+
'Sundanese': 'su-ID-JajangNeural',
|
| 207 |
+
'Swedish': 'sv-SE-MattiasNeural',
|
| 208 |
+
'Swahili': 'sw-KE-RafikiNeural',
|
| 209 |
+
'Tamil': 'ta-IN-ValluvarNeural',
|
| 210 |
+
'Telugu': 'te-IN-MohanNeural',
|
| 211 |
+
'Chinese': 'zh-CN-YunjianNeural',
|
| 212 |
+
'Ukrainian': 'uk-UA-OstapNeural',
|
| 213 |
+
'Urdu': 'ur-IN-SalmanNeural',
|
| 214 |
+
'Uzbek': 'uz-UZ-SardorNeural',
|
| 215 |
+
'Zulu': 'zu-ZA-ThembaNeural'}
|
microsoft_tts.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#@title <-- Just run the cell (config edge TTS)
|
| 2 |
+
edge_folder="."
|
| 3 |
+
import nltk
|
| 4 |
+
nltk.download('punkt')
|
| 5 |
+
from nltk.tokenize import sent_tokenize
|
| 6 |
+
|
| 7 |
+
from deep_translator import GoogleTranslator
|
| 8 |
+
|
| 9 |
+
from lang_data import languages,male_voice_list,female_voice_list
|
| 10 |
+
|
| 11 |
+
def translate_text(text, Language):
|
| 12 |
+
# print("calling translate")
|
| 13 |
+
target_language=languages[Language]
|
| 14 |
+
if Language == "Chinese":
|
| 15 |
+
target_language='zh-CN'
|
| 16 |
+
translator = GoogleTranslator(target=target_language)
|
| 17 |
+
translation = translator.translate(text.strip())
|
| 18 |
+
t_text=str(translation)
|
| 19 |
+
# print(f"{t_text}---{Language}----{target_language}")
|
| 20 |
+
return t_text
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def chunks_sentences(paragraph, join_limit=2):
|
| 24 |
+
sentences = sent_tokenize(paragraph)
|
| 25 |
+
# Initialize an empty list to store the new sentences
|
| 26 |
+
new_sentences = []
|
| 27 |
+
|
| 28 |
+
# Iterate through the list of sentences in steps of 'join_limit'
|
| 29 |
+
for i in range(0, len(sentences), join_limit):
|
| 30 |
+
# Join the sentences with a space between them
|
| 31 |
+
new_sentence = ' '.join(sentences[i:i + join_limit])
|
| 32 |
+
new_sentences.append(new_sentence)
|
| 33 |
+
return new_sentences
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def calculate_rate_string(input_value):
|
| 37 |
+
rate = (input_value - 1) * 100
|
| 38 |
+
sign = '+' if input_value >= 1 else '-'
|
| 39 |
+
return f"{sign}{abs(int(rate))}"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def make_chunks(input_text, language):
|
| 43 |
+
language="English"
|
| 44 |
+
if language == "English":
|
| 45 |
+
filtered_list=chunks_sentences(input_text, join_limit=2)
|
| 46 |
+
# temp_list = input_text.strip().split(".")
|
| 47 |
+
# filtered_list = [element.strip() + '.' for element in temp_list[:-1] if element.strip() and element.strip() != "'" and element.strip() != '"']
|
| 48 |
+
# if temp_list[-1].strip():
|
| 49 |
+
# filtered_list.append(temp_list[-1].strip())
|
| 50 |
+
return filtered_list
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
import re
|
| 56 |
+
import uuid
|
| 57 |
+
def tts_file_name(text):
|
| 58 |
+
if text.endswith("."):
|
| 59 |
+
text = text[:-1]
|
| 60 |
+
text = text.lower()
|
| 61 |
+
text = text.strip()
|
| 62 |
+
text = text.replace(" ","_")
|
| 63 |
+
truncated_text = text[:25] if len(text) > 25 else text if len(text) > 0 else "empty"
|
| 64 |
+
random_string = uuid.uuid4().hex[:8].upper()
|
| 65 |
+
file_name = f"{edge_folder}/edge_tts_voice/{truncated_text}_{random_string}.mp3"
|
| 66 |
+
return file_name
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
from pydub import AudioSegment
|
| 70 |
+
import shutil
|
| 71 |
+
import os
|
| 72 |
+
def merge_audio_files(audio_paths, output_path):
|
| 73 |
+
# Initialize an empty AudioSegment
|
| 74 |
+
merged_audio = AudioSegment.silent(duration=0)
|
| 75 |
+
|
| 76 |
+
# Iterate through each audio file path
|
| 77 |
+
for audio_path in audio_paths:
|
| 78 |
+
# Load the audio file using Pydub
|
| 79 |
+
audio = AudioSegment.from_file(audio_path)
|
| 80 |
+
|
| 81 |
+
# Append the current audio file to the merged_audio
|
| 82 |
+
merged_audio += audio
|
| 83 |
+
|
| 84 |
+
# Export the merged audio to the specified output path
|
| 85 |
+
merged_audio.export(output_path, format="mp3")
|
| 86 |
+
|
| 87 |
+
def edge_free_tts(chunks_list,speed,voice_name,save_path,translate_text_flag,Language):
|
| 88 |
+
# print(voice_name)
|
| 89 |
+
# print(chunks_list)
|
| 90 |
+
store_text=""
|
| 91 |
+
if len(chunks_list)>1:
|
| 92 |
+
chunk_audio_list=[]
|
| 93 |
+
if os.path.exists(f"{edge_folder}/edge_tts_voice"):
|
| 94 |
+
shutil.rmtree(f"{edge_folder}/edge_tts_voice")
|
| 95 |
+
os.mkdir(f"{edge_folder}/edge_tts_voice")
|
| 96 |
+
k=1
|
| 97 |
+
for i in chunks_list:
|
| 98 |
+
# print(i)
|
| 99 |
+
if translate_text_flag:
|
| 100 |
+
text=translate_text(i, Language)
|
| 101 |
+
else:
|
| 102 |
+
text=i
|
| 103 |
+
store_text+=text+" "
|
| 104 |
+
text=text.replace('"',"")
|
| 105 |
+
edge_command=f'edge-tts --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{text}" --write-media {edge_folder}/edge_tts_voice/{k}.mp3'
|
| 106 |
+
var1=os.system(edge_command)
|
| 107 |
+
if var1==0:
|
| 108 |
+
pass
|
| 109 |
+
else:
|
| 110 |
+
print(f"Failed: {i}")
|
| 111 |
+
print(edge_command)
|
| 112 |
+
chunk_audio_list.append(f"{edge_folder}/edge_tts_voice/{k}.mp3")
|
| 113 |
+
k+=1
|
| 114 |
+
# print(chunk_audio_list)
|
| 115 |
+
merge_audio_files(chunk_audio_list, save_path)
|
| 116 |
+
else:
|
| 117 |
+
if translate_text_flag:
|
| 118 |
+
text=translate_text(chunks_list[0], Language)
|
| 119 |
+
else:
|
| 120 |
+
text=chunks_list[0]
|
| 121 |
+
text=text.replace('"',"")
|
| 122 |
+
store_text+=text+" "
|
| 123 |
+
edge_command=f'edge-tts --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{text}" --write-media {save_path}'
|
| 124 |
+
var2=os.system(edge_command)
|
| 125 |
+
if var2==0:
|
| 126 |
+
pass
|
| 127 |
+
else:
|
| 128 |
+
print(f"Failed: {chunks_list[0]}")
|
| 129 |
+
print(edge_command)
|
| 130 |
+
with open("./temp.txt", "w", encoding="utf-8") as text_file:
|
| 131 |
+
text_file.write(store_text)
|
| 132 |
+
return save_path
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
# speed = 1 # @param {type: "number"}
|
| 136 |
+
# translate_text_flag = True # @param {type:"boolean"}
|
| 137 |
+
# long_sentence = True # @param {type:"boolean"}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
# from IPython.display import clear_output
|
| 147 |
+
# from IPython.display import Audio
|
| 148 |
+
if not os.path.exists(f"{edge_folder}/audio"):
|
| 149 |
+
os.mkdir(f"{edge_folder}/audio")
|
| 150 |
+
import uuid
|
| 151 |
+
def random_audio_name_generate():
|
| 152 |
+
random_uuid = uuid.uuid4()
|
| 153 |
+
audio_extension = ".mp3"
|
| 154 |
+
random_audio_name = str(random_uuid)[:8] + audio_extension
|
| 155 |
+
return random_audio_name
|
| 156 |
+
def edge_tts_pipeline(input_text,Language='English',voice_name=None,Gender='Male',translate_text_flag=True,no_silence=False,speed=1,tts_save_path="",long_sentence=True):
|
| 157 |
+
# print("calling gradio_talk")
|
| 158 |
+
# global long_sentence,translate_text_flag,Language,speed,voice_name,Gender
|
| 159 |
+
global male_voice_list,female_voice_list
|
| 160 |
+
# long_sentence=True
|
| 161 |
+
# translate_text_flag=True
|
| 162 |
+
# speed=1
|
| 163 |
+
if long_sentence==False:
|
| 164 |
+
if len(input_text)>500:
|
| 165 |
+
long_sentence=True
|
| 166 |
+
if voice_name==None:
|
| 167 |
+
if Gender=="Male":
|
| 168 |
+
voice_name=male_voice_list[Language]
|
| 169 |
+
if Gender=="Female":
|
| 170 |
+
voice_name=female_voice_list[Language]
|
| 171 |
+
if long_sentence==True and translate_text_flag==True:
|
| 172 |
+
chunks_list=make_chunks(input_text,Language)
|
| 173 |
+
elif long_sentence==True and translate_text_flag==False:
|
| 174 |
+
chunks_list=make_chunks(input_text,"English")
|
| 175 |
+
else:
|
| 176 |
+
chunks_list=[input_text]
|
| 177 |
+
temp_save_path=f"{edge_folder}/audio/"+random_audio_name_generate()
|
| 178 |
+
save_path=temp_save_path.lower().replace(".mp3",".wav")
|
| 179 |
+
# print(chunks_list,speed,voice_name,save_path,translate_text_flag,Language)
|
| 180 |
+
edge_save_path=edge_free_tts(chunks_list,speed,voice_name,temp_save_path,translate_text_flag,Language)
|
| 181 |
+
mp3_to_wav(edge_save_path, save_path)
|
| 182 |
+
audio_return_path=save_path
|
| 183 |
+
if no_silence:
|
| 184 |
+
clean_path=f"{edge_folder}/audio/"+random_audio_name_generate().replace(".mp3",".wav")
|
| 185 |
+
remove_silence(save_path,clean_path)
|
| 186 |
+
audio_return_path=clean_path
|
| 187 |
+
# return clean_path
|
| 188 |
+
if tts_save_path=="":
|
| 189 |
+
return audio_return_path
|
| 190 |
+
else:
|
| 191 |
+
shutil.copyfile(audio_return_path,tts_save_path)
|
| 192 |
+
return audio_return_path
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def talk(input_text):
|
| 197 |
+
# global long_sentence,translate_text_flag,Language,speed,voice_name,Gender
|
| 198 |
+
global Language, Gender,male_voice_list,female_voice_list
|
| 199 |
+
global no_silence
|
| 200 |
+
long_sentence=True
|
| 201 |
+
translate_text_flag=False
|
| 202 |
+
speed=1
|
| 203 |
+
|
| 204 |
+
if Gender=="Male":
|
| 205 |
+
voice_name=male_voice_list[Language]
|
| 206 |
+
if Gender=="Female":
|
| 207 |
+
voice_name=female_voice_list[Language]
|
| 208 |
+
if long_sentence==True and translate_text_flag==True:
|
| 209 |
+
chunks_list=make_chunks(input_text,Language)
|
| 210 |
+
elif long_sentence==True and translate_text_flag==False:
|
| 211 |
+
chunks_list=make_chunks(input_text,"English")
|
| 212 |
+
else:
|
| 213 |
+
chunks_list=[input_text]
|
| 214 |
+
|
| 215 |
+
temp_save_path=f"{edge_folder}/audio/"+random_audio_name_generate()
|
| 216 |
+
# print(f"temp_save_path: {temp_save_path}")
|
| 217 |
+
save_path=temp_save_path.replace(".mp3",".wav")
|
| 218 |
+
# print(f"save_path: {save_path}")
|
| 219 |
+
edge_save_path=edge_free_tts(chunks_list,speed,voice_name,temp_save_path,translate_text_flag,Language)
|
| 220 |
+
|
| 221 |
+
mp3_to_wav(edge_save_path, save_path)
|
| 222 |
+
if no_silence:
|
| 223 |
+
clean_path=f"{edge_folder}/audio/"+random_audio_name_generate().replace(".mp3",".wav")
|
| 224 |
+
remove_silence(save_path,clean_path)
|
| 225 |
+
return clean_path
|
| 226 |
+
return save_path
|
| 227 |
+
|
| 228 |
+
from pydub import AudioSegment
|
| 229 |
+
from pydub.silence import split_on_silence
|
| 230 |
+
import os
|
| 231 |
+
|
| 232 |
+
def remove_silence(file_path,output_path):
|
| 233 |
+
# Extract file name and format from the provided path
|
| 234 |
+
file_name = os.path.basename(file_path)
|
| 235 |
+
audio_format = "wav"
|
| 236 |
+
|
| 237 |
+
# Reading and splitting the audio file into chunks
|
| 238 |
+
sound = AudioSegment.from_file(file_path, format=audio_format)
|
| 239 |
+
audio_chunks = split_on_silence(sound,
|
| 240 |
+
min_silence_len=100,
|
| 241 |
+
silence_thresh=-45,
|
| 242 |
+
keep_silence=50)
|
| 243 |
+
|
| 244 |
+
# Putting the file back together
|
| 245 |
+
combined = AudioSegment.empty()
|
| 246 |
+
for chunk in audio_chunks:
|
| 247 |
+
combined += chunk
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
combined.export(output_path, format=audio_format)
|
| 251 |
+
print(f"Remove silence successfully: {output_path}")
|
| 252 |
+
|
| 253 |
+
return output_path
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
from pydub import AudioSegment
|
| 257 |
+
|
| 258 |
+
def mp3_to_wav(mp3_file, wav_file):
|
| 259 |
+
# Load the MP3 file
|
| 260 |
+
# print("calling mp3_to_wav")
|
| 261 |
+
# print(mp3_file,wav_file)
|
| 262 |
+
audio = AudioSegment.from_mp3(mp3_file)
|
| 263 |
+
|
| 264 |
+
# Export the audio to WAV format
|
| 265 |
+
audio.export(wav_file, format="wav")
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
###use case
|
| 269 |
+
|
| 270 |
+
# from microsoft_tts import edge_tts_pipeline
|
| 271 |
+
# def tts(text, Language='English',voice_name=None, tts_save_path='', Gender='Male', translate_text_flag=False, no_silence=True, speed=1.0, long_sentence=True):
|
| 272 |
+
# edge_save_path = edge_tts_pipeline(text, Language,voice_name, Gender, translate_text_flag=translate_text_flag,
|
| 273 |
+
# no_silence=no_silence, speed=speed, tts_save_path=tts_save_path,
|
| 274 |
+
# long_sentence=long_sentence)
|
| 275 |
+
# return edge_save_path
|
| 276 |
+
|
| 277 |
+
# text="Machine learning is the study of computer algorithms that improve automatically through experience. It is seen as a subset of artificial intelligence. Machine learning algorithms build a model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to do so. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop conventional algorithms to perform the needed tasks."
|
| 278 |
+
# save_path = tts(text, Language='English',Gender="Male")
|
| 279 |
+
# print(save_path)
|
| 280 |
+
# import simpleaudio as sa
|
| 281 |
+
# def play_sound(filename):
|
| 282 |
+
# wave_obj = sa.WaveObject.from_wave_file(filename)
|
| 283 |
+
# play_obj = wave_obj.play()
|
| 284 |
+
# play_obj.wait_done()
|
| 285 |
+
# play_sound(save_path)
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
# edge_save_path=talk(text)
|
| 314 |
+
# print(f"Audio File Save at: {edge_save_path}")
|
| 315 |
+
|
| 316 |
+
# text = "a quick brown fox jumps over the lazy dog and the dog barks loudly"
|
| 317 |
+
# Language = "English" # @param ['English','Hindi','Bengali','Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Bulgarian', 'Bosnian', 'Catalan', 'Czech', 'Welsh', 'Danish', 'German', 'Greek', 'Spanish', 'French', 'Irish', 'Galician', 'Gujarati', 'Hebrew', 'Croatian', 'Hungarian', 'Indonesian', 'Icelandic', 'Italian', 'Japanese', 'Javanese', 'Georgian', 'Kazakh', 'Khmer', 'Kannada', 'Korean', 'Lao', 'Lithuanian', 'Latvian', 'Macedonian', 'Malayalam', 'Mongolian', 'Marathi', 'Malay', 'Maltese', 'Burmese', 'Norwegian Bokmål', 'Nepali', 'Dutch', 'Polish', 'Pashto', 'Portuguese', 'Romanian', 'Russian', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'Albanian', 'Serbian', 'Sundanese', 'Swedish', 'Swahili', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Chinese', 'Zulu']
|
| 318 |
+
# no_silence = False
|
| 319 |
+
# Gender = "Male"# @param ['Male', 'Female']
|
| 320 |
+
# translate_text_flag=True
|
| 321 |
+
# no_silence=True
|
| 322 |
+
# speed=1
|
| 323 |
+
# tts_save_path='temp.wav'
|
| 324 |
+
# edge_save_path=edge_tts_pipeline(text,Language,Gender,translate_text_flag=translate_text_flag,no_silence=no_silence,speed=speed,tts_save_path=tts_save_path)
|
| 325 |
+
# print(f"Audio File Save at: {edge_save_path}")
|
| 326 |
+
|
| 327 |
+
# from microsoft_tts import edge_tts_pipeline
|
| 328 |
+
# def tts(text,tts_save_path=''):
|
| 329 |
+
# # text = "a quick brown fox jumps over the lazy dog and the dog barks loudly"
|
| 330 |
+
# Language = "English" # @param ['English','Hindi','Bengali','Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Bulgarian', 'Bosnian', 'Catalan', 'Czech', 'Welsh', 'Danish', 'German', 'Greek', 'Spanish', 'French', 'Irish', 'Galician', 'Gujarati', 'Hebrew', 'Croatian', 'Hungarian', 'Indonesian', 'Icelandic', 'Italian', 'Japanese', 'Javanese', 'Georgian', 'Kazakh', 'Khmer', 'Kannada', 'Korean', 'Lao', 'Lithuanian', 'Latvian', 'Macedonian', 'Malayalam', 'Mongolian', 'Marathi', 'Malay', 'Maltese', 'Burmese', 'Norwegian Bokmål', 'Nepali', 'Dutch', 'Polish', 'Pashto', 'Portuguese', 'Romanian', 'Russian', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'Albanian', 'Serbian', 'Sundanese', 'Swedish', 'Swahili', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Chinese', 'Zulu']
|
| 331 |
+
# no_silence = False
|
| 332 |
+
# Gender = "Male"# @param ['Male', 'Female']
|
| 333 |
+
# translate_text_flag=True
|
| 334 |
+
# no_silence=True
|
| 335 |
+
# speed=1
|
| 336 |
+
# # tts_save_path='temp.wav'
|
| 337 |
+
# long_sentence=True
|
| 338 |
+
# edge_save_path=edge_tts_pipeline(text,Language,Gender,translate_text_flag=translate_text_flag,no_silence=no_silence,speed=speed,tts_save_path=tts_save_path,long_sentence=long_sentence)
|
| 339 |
+
# print(f"Audio File Save at: {edge_save_path}")
|
| 340 |
+
# return edge_save_path
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
edge-tts
|
| 2 |
+
deep_translator==1.11.4
|
| 3 |
+
nltk==3.8.1
|
| 4 |
+
pydub==0.25.1
|
| 5 |
+
gradio>=5.6.0
|
| 6 |
+
click==8.1.7
|