Update app.py
Browse files
app.py
CHANGED
|
@@ -1,497 +1,310 @@
|
|
| 1 |
-
import tempfile
|
| 2 |
-
|
| 3 |
-
import edge_tts
|
| 4 |
import gradio as gr
|
| 5 |
-
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
"Guy": "en-US-GuyNeural",
|
| 12 |
-
"Ana": "en-US-AnaNeural",
|
| 13 |
-
"Aria": "en-US-AriaNeural",
|
| 14 |
-
"Christopher": "en-US-ChristopherNeural",
|
| 15 |
-
"Eric": "en-US-EricNeural",
|
| 16 |
-
"Michelle": "en-US-MichelleNeural",
|
| 17 |
-
"Roger": "en-US-RogerNeural",
|
| 18 |
-
"Natasha": "en-AU-NatashaNeural",
|
| 19 |
-
"William": "en-AU-WilliamNeural",
|
| 20 |
-
"Clara": "en-CA-ClaraNeural",
|
| 21 |
-
"Liam": "en-CA-LiamNeural",
|
| 22 |
-
"Libby": "en-GB-LibbyNeural",
|
| 23 |
-
"Maisie": "en-GB-MaisieNeural",
|
| 24 |
-
"Ryan": "en-GB-RyanNeural",
|
| 25 |
-
"Sonia": "en-GB-SoniaNeural",
|
| 26 |
-
"Thomas": "en-GB-ThomasNeural",
|
| 27 |
-
"Sam": "en-HK-SamNeural",
|
| 28 |
-
"Yan": "en-HK-YanNeural",
|
| 29 |
-
"Connor": "en-IE-ConnorNeural",
|
| 30 |
-
"Emily": "en-IE-EmilyNeural",
|
| 31 |
-
"Neerja": "en-IN-NeerjaNeural",
|
| 32 |
-
"Prabhat": "en-IN-PrabhatNeural",
|
| 33 |
-
"Asilia": "en-KE-AsiliaNeural",
|
| 34 |
-
"Chilemba": "en-KE-ChilembaNeural",
|
| 35 |
-
"Abeo": "en-NG-AbeoNeural",
|
| 36 |
-
"Ezinne": "en-NG-EzinneNeural",
|
| 37 |
-
"Mitchell": "en-NZ-MitchellNeural",
|
| 38 |
-
"James": "en-PH-JamesNeural",
|
| 39 |
-
"Rosa": "en-PH-RosaNeural",
|
| 40 |
-
"Luna": "en-SG-LunaNeural",
|
| 41 |
-
"Wayne": "en-SG-WayneNeural",
|
| 42 |
-
"Elimu": "en-TZ-ElimuNeural",
|
| 43 |
-
"Imani": "en-TZ-ImaniNeural",
|
| 44 |
-
"Leah": "en-ZA-LeahNeural",
|
| 45 |
-
"Luke": "en-ZA-LukeNeural"
|
| 46 |
-
},
|
| 47 |
-
"Spanish": {
|
| 48 |
-
"Elena": "es-AR-ElenaNeural",
|
| 49 |
-
"Tomas": "es-AR-TomasNeural",
|
| 50 |
-
"Marcelo": "es-BO-MarceloNeural",
|
| 51 |
-
"Sofia": "es-BO-SofiaNeural",
|
| 52 |
-
"Gonzalo": "es-CO-GonzaloNeural",
|
| 53 |
-
"Salome": "es-CO-SalomeNeural",
|
| 54 |
-
"Juan": "es-CR-JuanNeural",
|
| 55 |
-
"Maria": "es-CR-MariaNeural",
|
| 56 |
-
"Belkys": "es-CU-BelkysNeural",
|
| 57 |
-
"Emilio": "es-DO-EmilioNeural",
|
| 58 |
-
"Ramona": "es-DO-RamonaNeural",
|
| 59 |
-
"Andrea": "es-EC-AndreaNeural",
|
| 60 |
-
"Luis": "es-EC-LuisNeural",
|
| 61 |
-
"Alvaro": "es-ES-AlvaroNeural",
|
| 62 |
-
"Elvira": "es-ES-ElviraNeural",
|
| 63 |
-
"Teresa": "es-GQ-TeresaNeural",
|
| 64 |
-
"Andres": "es-GT-AndresNeural",
|
| 65 |
-
"Marta": "es-GT-MartaNeural",
|
| 66 |
-
"Carlos": "es-HN-CarlosNeural",
|
| 67 |
-
"Karla": "es-HN-KarlaNeural",
|
| 68 |
-
"Federico": "es-NI-FedericoNeural",
|
| 69 |
-
"Yolanda": "es-NI-YolandaNeural",
|
| 70 |
-
"Margarita": "es-PA-MargaritaNeural",
|
| 71 |
-
"Roberto": "es-PA-RobertoNeural",
|
| 72 |
-
"Alex": "es-PE-AlexNeural",
|
| 73 |
-
"Camila": "es-PE-CamilaNeural",
|
| 74 |
-
"Karina": "es-PR-KarinaNeural",
|
| 75 |
-
"Victor": "es-PR-VictorNeural",
|
| 76 |
-
"Mario": "es-PY-MarioNeural",
|
| 77 |
-
"Tania": "es-PY-TaniaNeural",
|
| 78 |
-
"Lorena": "es-SV-LorenaNeural",
|
| 79 |
-
"Rodrigo": "es-SV-RodrigoNeural",
|
| 80 |
-
"Alonso": "es-US-AlonsoNeural",
|
| 81 |
-
"Paloma": "es-US-PalomaNeural",
|
| 82 |
-
"Mateo": "es-UY-MateoNeural",
|
| 83 |
-
"Valentina": "es-UY-ValentinaNeural",
|
| 84 |
-
"Paola": "es-VE-PaolaNeural",
|
| 85 |
-
"Sebastian": "es-VE-SebastianNeural"
|
| 86 |
-
},
|
| 87 |
-
"Arabic": {
|
| 88 |
-
"Hamed": "ar-SA-HamedNeural",
|
| 89 |
-
"Zariyah": "ar-SA-ZariyahNeural",
|
| 90 |
-
"Fatima": "ar-AE-FatimaNeural",
|
| 91 |
-
"Hamdan": "ar-AE-HamdanNeural",
|
| 92 |
-
"Ali": "ar-BH-AliNeural",
|
| 93 |
-
"Laila": "ar-BH-LailaNeural",
|
| 94 |
-
"Ismael": "ar-DZ-IsmaelNeural",
|
| 95 |
-
"Salma": "ar-EG-SalmaNeural",
|
| 96 |
-
"Shakir": "ar-EG-ShakirNeural",
|
| 97 |
-
"Bassel": "ar-IQ-BasselNeural",
|
| 98 |
-
"Rana": "ar-IQ-RanaNeural",
|
| 99 |
-
"Sana": "ar-JO-SanaNeural",
|
| 100 |
-
"Taim": "ar-JO-TaimNeural",
|
| 101 |
-
"Fahed": "ar-KW-FahedNeural",
|
| 102 |
-
"Noura": "ar-KW-NouraNeural",
|
| 103 |
-
"Layla": "ar-LB-LaylaNeural",
|
| 104 |
-
"Rami": "ar-LB-RamiNeural",
|
| 105 |
-
"Iman": "ar-LY-ImanNeural",
|
| 106 |
-
"Omar": "ar-LY-OmarNeural",
|
| 107 |
-
"Jamal": "ar-MA-JamalNeural",
|
| 108 |
-
"Mouna": "ar-MA-MounaNeural",
|
| 109 |
-
"Abdullah": "ar-OM-AbdullahNeural",
|
| 110 |
-
"Aysha": "ar-OM-AyshaNeural",
|
| 111 |
-
"Amal": "ar-QA-AmalNeural",
|
| 112 |
-
"Moaz": "ar-QA-MoazNeural",
|
| 113 |
-
"Amany": "ar-SY-AmanyNeural",
|
| 114 |
-
"Laith": "ar-SY-LaithNeural",
|
| 115 |
-
"Hedi": "ar-TN-HediNeural",
|
| 116 |
-
"Reem": "ar-TN-ReemNeural",
|
| 117 |
-
"Maryam": "ar-YE-MaryamNeural",
|
| 118 |
-
"Saleh": "ar-YE-SalehNeural"
|
| 119 |
-
},
|
| 120 |
-
"Korean": {
|
| 121 |
-
"Sun-Hi": "ko-KR-SunHiNeural",
|
| 122 |
-
"InJoon": "ko-KR-InJoonNeural"
|
| 123 |
-
},
|
| 124 |
-
"Thai": {
|
| 125 |
-
"Premwadee": "th-TH-PremwadeeNeural",
|
| 126 |
-
"Niwat": "th-TH-NiwatNeural"
|
| 127 |
-
},
|
| 128 |
-
"Vietnamese": {
|
| 129 |
-
"HoaiMy": "vi-VN-HoaiMyNeural",
|
| 130 |
-
"NamMinh": "vi-VN-NamMinhNeural"
|
| 131 |
-
},
|
| 132 |
-
"Japanese": {
|
| 133 |
-
"Nanami": "ja-JP-NanamiNeural",
|
| 134 |
-
"Keita": "ja-JP-KeitaNeural"
|
| 135 |
-
},
|
| 136 |
-
"French": {
|
| 137 |
-
"Denise": "fr-FR-DeniseNeural",
|
| 138 |
-
"Eloise": "fr-FR-EloiseNeural",
|
| 139 |
-
"Henri": "fr-FR-HenriNeural",
|
| 140 |
-
"Sylvie": "fr-CA-SylvieNeural",
|
| 141 |
-
"Antoine": "fr-CA-AntoineNeural",
|
| 142 |
-
"Jean": "fr-CA-JeanNeural",
|
| 143 |
-
"Ariane": "fr-CH-ArianeNeural",
|
| 144 |
-
"Fabrice": "fr-CH-FabriceNeural",
|
| 145 |
-
"Charline": "fr-BE-CharlineNeural",
|
| 146 |
-
"Gerard": "fr-BE-GerardNeural"
|
| 147 |
-
},
|
| 148 |
-
"Portuguese": {
|
| 149 |
-
"Francisca": "pt-BR-FranciscaNeural",
|
| 150 |
-
"Antonio": "pt-BR-AntonioNeural",
|
| 151 |
-
"Duarte": "pt-PT-DuarteNeural",
|
| 152 |
-
"Raquel": "pt-PT-RaquelNeural"
|
| 153 |
-
},
|
| 154 |
-
"Indonesian": {
|
| 155 |
-
"Ardi": "id-ID-ArdiNeural",
|
| 156 |
-
"Gadis": "id-ID-GadisNeural"
|
| 157 |
-
},
|
| 158 |
-
"Hebrew": {
|
| 159 |
-
"Avri": "he-IL-AvriNeural",
|
| 160 |
-
"Hila": "he-IL-HilaNeural"
|
| 161 |
-
},
|
| 162 |
-
"Italian": {
|
| 163 |
-
"Isabella": "it-IT-IsabellaNeural",
|
| 164 |
-
"Diego": "it-IT-DiegoNeural",
|
| 165 |
-
"Elsa": "it-IT-ElsaNeural"
|
| 166 |
-
},
|
| 167 |
-
"Dutch": {
|
| 168 |
-
"Colette": "nl-NL-ColetteNeural",
|
| 169 |
-
"Fenna": "nl-NL-FennaNeural",
|
| 170 |
-
"Maarten": "nl-NL-MaartenNeural",
|
| 171 |
-
"Arnaud": "nl-BE-ArnaudNeural",
|
| 172 |
-
"Dena": "nl-BE-DenaNeural"
|
| 173 |
-
},
|
| 174 |
-
"Malay": {
|
| 175 |
-
"Osman": "ms-MY-OsmanNeural",
|
| 176 |
-
"Yasmin": "ms-MY-YasminNeural"
|
| 177 |
-
},
|
| 178 |
-
"Norwegian": {
|
| 179 |
-
"Pernille": "nb-NO-PernilleNeural",
|
| 180 |
-
"Finn": "nb-NO-FinnNeural"
|
| 181 |
-
},
|
| 182 |
-
"Swedish": {
|
| 183 |
-
"Sofie": "sv-SE-SofieNeural",
|
| 184 |
-
"Mattias": "sv-SE-MattiasNeural"
|
| 185 |
-
},
|
| 186 |
-
"Greek": {
|
| 187 |
-
"Athina": "el-GR-AthinaNeural",
|
| 188 |
-
"Nestoras": "el-GR-NestorasNeural"
|
| 189 |
-
},
|
| 190 |
-
"German": {
|
| 191 |
-
"Katja": "de-DE-KatjaNeural",
|
| 192 |
-
"Amala": "de-DE-AmalaNeural",
|
| 193 |
-
"Conrad": "de-DE-ConradNeural",
|
| 194 |
-
"Killian": "de-DE-KillianNeural",
|
| 195 |
-
"Ingrid": "de-AT-IngridNeural",
|
| 196 |
-
"Jonas": "de-AT-JonasNeural",
|
| 197 |
-
"Jan": "de-CH-JanNeural",
|
| 198 |
-
"Leni": "de-CH-LeniNeural"
|
| 199 |
-
},
|
| 200 |
-
"Afrikaans": {
|
| 201 |
-
"Adri": "af-ZA-AdriNeural",
|
| 202 |
-
"Willem": "af-ZA-WillemNeural"
|
| 203 |
-
},
|
| 204 |
-
"Amharic": {
|
| 205 |
-
"Ameha": "am-ET-AmehaNeural",
|
| 206 |
-
"Mekdes": "am-ET-MekdesNeural"
|
| 207 |
-
},
|
| 208 |
-
"Azerbaijani": {
|
| 209 |
-
"Babek": "az-AZ-BabekNeural",
|
| 210 |
-
"Banu": "az-AZ-BanuNeural"
|
| 211 |
-
},
|
| 212 |
-
"Bulgarian": {
|
| 213 |
-
"Borislav": "bg-BG-BorislavNeural",
|
| 214 |
-
"Kalina": "bg-BG-KalinaNeural"
|
| 215 |
-
},
|
| 216 |
-
"Bengali": {
|
| 217 |
-
"Nabanita": "bn-BD-NabanitaNeural",
|
| 218 |
-
"Pradeep": "bn-BD-PradeepNeural",
|
| 219 |
-
"Bashkar": "bn-IN-BashkarNeural",
|
| 220 |
-
"Tanishaa": "bn-IN-TanishaaNeural"
|
| 221 |
-
},
|
| 222 |
-
"Bosnian": {
|
| 223 |
-
"Goran": "bs-BA-GoranNeural",
|
| 224 |
-
"Vesna": "bs-BA-VesnaNeural"
|
| 225 |
-
},
|
| 226 |
-
"Catalan": {
|
| 227 |
-
"Joana": "ca-ES-JoanaNeural",
|
| 228 |
-
"Enric": "ca-ES-EnricNeural"
|
| 229 |
-
},
|
| 230 |
-
"Czech": {
|
| 231 |
-
"Antonin": "cs-CZ-AntoninNeural",
|
| 232 |
-
"Vlasta": "cs-CZ-VlastaNeural"
|
| 233 |
-
},
|
| 234 |
-
"Welsh": {
|
| 235 |
-
"Aled": "cy-GB-AledNeural",
|
| 236 |
-
"Nia": "cy-GB-NiaNeural"
|
| 237 |
-
},
|
| 238 |
-
"Danish": {
|
| 239 |
-
"Christel": "da-DK-ChristelNeural",
|
| 240 |
-
"Jeppe": "da-DK-JeppeNeural"
|
| 241 |
-
},
|
| 242 |
-
"Estonian": {
|
| 243 |
-
"Anu": "et-EE-AnuNeural",
|
| 244 |
-
"Kert": "et-EE-KertNeural"
|
| 245 |
-
},
|
| 246 |
-
"Persian": {
|
| 247 |
-
"Dilara": "fa-IR-DilaraNeural",
|
| 248 |
-
"Farid": "fa-IR-FaridNeural"
|
| 249 |
-
},
|
| 250 |
-
"Finnish": {
|
| 251 |
-
"Harri": "fi-FI-HarriNeural",
|
| 252 |
-
"Noora": "fi-FI-NooraNeural"
|
| 253 |
-
},
|
| 254 |
-
"Irish": {
|
| 255 |
-
"Colm": "ga-IE-ColmNeural",
|
| 256 |
-
"Orla": "ga-IE-OrlaNeural"
|
| 257 |
-
},
|
| 258 |
-
"Galician": {
|
| 259 |
-
"Roi": "gl-ES-RoiNeural",
|
| 260 |
-
"Sabela": "gl-ES-SabelaNeural"
|
| 261 |
-
},
|
| 262 |
-
"Gujarati": {
|
| 263 |
-
"Dhwani": "gu-IN-DhwaniNeural",
|
| 264 |
-
"Niranjan": "gu-IN-NiranjanNeural"
|
| 265 |
-
},
|
| 266 |
-
"Hindi": {
|
| 267 |
-
"Madhur": "hi-IN-MadhurNeural",
|
| 268 |
-
"Swara": "hi-IN-SwaraNeural"
|
| 269 |
-
},
|
| 270 |
-
"Croatian": {
|
| 271 |
-
"Gabrijela": "hr-HR-GabrijelaNeural",
|
| 272 |
-
"Srecko": "hr-HR-SreckoNeural"
|
| 273 |
-
},
|
| 274 |
-
"Hungarian": {
|
| 275 |
-
"Noemi": "hu-HU-NoemiNeural",
|
| 276 |
-
"Tamas": "hu-HU-TamasNeural"
|
| 277 |
-
},
|
| 278 |
-
"Icelandic": {
|
| 279 |
-
"Gudrun": "is-IS-GudrunNeural",
|
| 280 |
-
"Gunnar": "is-IS-GunnarNeural"
|
| 281 |
-
},
|
| 282 |
-
"Javanese": {
|
| 283 |
-
"Dimas": "jv-ID-DimasNeural",
|
| 284 |
-
"Siti": "jv-ID-SitiNeural"
|
| 285 |
-
},
|
| 286 |
-
"Georgian": {
|
| 287 |
-
"Eka": "ka-GE-EkaNeural",
|
| 288 |
-
"Giorgi": "ka-GE-GiorgiNeural"
|
| 289 |
-
},
|
| 290 |
-
"Kazakh": {
|
| 291 |
-
"Aigul": "kk-KZ-AigulNeural",
|
| 292 |
-
"Daulet": "kk-KZ-DauletNeural"
|
| 293 |
-
},
|
| 294 |
-
"Khmer": {
|
| 295 |
-
"Piseth": "km-KH-PisethNeural",
|
| 296 |
-
"Sreymom": "km-KH-SreymomNeural"
|
| 297 |
-
},
|
| 298 |
-
"Kannada": {
|
| 299 |
-
"Gagan": "kn-IN-GaganNeural",
|
| 300 |
-
"Sapna": "kn-IN-SapnaNeural"
|
| 301 |
-
},
|
| 302 |
-
"Lao": {
|
| 303 |
-
"Chanthavong": "lo-LA-ChanthavongNeural",
|
| 304 |
-
"Keomany": "lo-LA-KeomanyNeural"
|
| 305 |
-
},
|
| 306 |
-
"Lithuanian": {
|
| 307 |
-
"Leonas": "lt-LT-LeonasNeural",
|
| 308 |
-
"Ona": "lt-LT-OnaNeural"
|
| 309 |
-
},
|
| 310 |
-
"Latvian": {
|
| 311 |
-
"Everita": "lv-LV-EveritaNeural",
|
| 312 |
-
"Nils": "lv-LV-NilsNeural"
|
| 313 |
-
},
|
| 314 |
-
"Macedonian": {
|
| 315 |
-
"Aleksandar": "mk-MK-AleksandarNeural",
|
| 316 |
-
"Marija": "mk-MK-MarijaNeural"
|
| 317 |
-
},
|
| 318 |
-
"Malayalam": {
|
| 319 |
-
"Midhun": "ml-IN-MidhunNeural",
|
| 320 |
-
"Sobhana": "ml-IN-SobhanaNeural"
|
| 321 |
-
},
|
| 322 |
-
"Mongolian": {
|
| 323 |
-
"Bataa": "mn-MN-BataaNeural",
|
| 324 |
-
"Yesui": "mn-MN-YesuiNeural"
|
| 325 |
-
},
|
| 326 |
-
"Marathi": {
|
| 327 |
-
"Aarohi": "mr-IN-AarohiNeural",
|
| 328 |
-
"Manohar": "mr-IN-ManoharNeural"
|
| 329 |
-
},
|
| 330 |
-
"Maltese": {
|
| 331 |
-
"Grace": "mt-MT-GraceNeural",
|
| 332 |
-
"Joseph": "mt-MT-JosephNeural"
|
| 333 |
-
},
|
| 334 |
-
"Burmese": {
|
| 335 |
-
"Nilar": "my-MM-NilarNeural",
|
| 336 |
-
"Thiha": "my-MM-ThihaNeural"
|
| 337 |
-
},
|
| 338 |
-
"Nepali": {
|
| 339 |
-
"Hemkala": "ne-NP-HemkalaNeural",
|
| 340 |
-
"Sagar": "ne-NP-SagarNeural"
|
| 341 |
-
},
|
| 342 |
-
"Polish": {
|
| 343 |
-
"Marek": "pl-PL-MarekNeural",
|
| 344 |
-
"Zofia": "pl-PL-ZofiaNeural"
|
| 345 |
-
},
|
| 346 |
-
"Pashto": {
|
| 347 |
-
"Gul Nawaz": "ps-AF-GulNawazNeural",
|
| 348 |
-
"Latifa": "ps-AF-LatifaNeural"
|
| 349 |
-
},
|
| 350 |
-
"Romanian": {
|
| 351 |
-
"Alina": "ro-RO-AlinaNeural",
|
| 352 |
-
"Emil": "ro-RO-EmilNeural"
|
| 353 |
-
},
|
| 354 |
-
"Russian": {
|
| 355 |
-
"Svetlana": "ru-RU-SvetlanaNeural",
|
| 356 |
-
"Dmitry": "ru-RU-DmitryNeural"
|
| 357 |
-
},
|
| 358 |
-
"Sinhala": {
|
| 359 |
-
"Sameera": "si-LK-SameeraNeural",
|
| 360 |
-
"Thilini": "si-LK-ThiliniNeural"
|
| 361 |
-
},
|
| 362 |
-
"Slovak": {
|
| 363 |
-
"Lukas": "sk-SK-LukasNeural",
|
| 364 |
-
"Viktoria": "sk-SK-ViktoriaNeural"
|
| 365 |
-
},
|
| 366 |
-
"Slovenian": {
|
| 367 |
-
"Petra": "sl-SI-PetraNeural",
|
| 368 |
-
"Rok": "sl-SI-RokNeural"
|
| 369 |
-
},
|
| 370 |
-
"Somali": {
|
| 371 |
-
"Muuse": "so-SO-MuuseNeural",
|
| 372 |
-
"Ubax": "so-SO-UbaxNeural"
|
| 373 |
-
},
|
| 374 |
-
"Albanian": {
|
| 375 |
-
"Anila": "sq-AL-AnilaNeural",
|
| 376 |
-
"Ilir": "sq-AL-IlirNeural"
|
| 377 |
-
},
|
| 378 |
-
"Serbian": {
|
| 379 |
-
"Nicholas": "sr-RS-NicholasNeural",
|
| 380 |
-
"Sophie": "sr-RS-SophieNeural"
|
| 381 |
-
},
|
| 382 |
-
"Sundanese": {
|
| 383 |
-
"Jajang": "su-ID-JajangNeural",
|
| 384 |
-
"Tuti": "su-ID-TutiNeural"
|
| 385 |
-
},
|
| 386 |
-
"Swahili": {
|
| 387 |
-
"Rafiki": "sw-KE-RafikiNeural",
|
| 388 |
-
"Zuri": "sw-KE-ZuriNeural",
|
| 389 |
-
"Daudi": "sw-TZ-DaudiNeural",
|
| 390 |
-
"Rehema": "sw-TZ-RehemaNeural"
|
| 391 |
-
},
|
| 392 |
-
"Tamil": {
|
| 393 |
-
"Pallavi": "ta-IN-PallaviNeural",
|
| 394 |
-
"Valluvar": "ta-IN-ValluvarNeural",
|
| 395 |
-
"Kumar": "ta-LK-KumarNeural",
|
| 396 |
-
"Saranya": "ta-LK-SaranyaNeural",
|
| 397 |
-
"Kani": "ta-MY-KaniNeural",
|
| 398 |
-
"Surya": "ta-MY-SuryaNeural",
|
| 399 |
-
"Anbu": "ta-SG-AnbuNeural"
|
| 400 |
-
},
|
| 401 |
-
"Telugu": {
|
| 402 |
-
"Mohan": "te-IN-MohanNeural",
|
| 403 |
-
"Shruti": "te-IN-ShrutiNeural"
|
| 404 |
-
},
|
| 405 |
-
"Turkish": {
|
| 406 |
-
"Ahmet": "tr-TR-AhmetNeural",
|
| 407 |
-
"Emel": "tr-TR-EmelNeural"
|
| 408 |
-
},
|
| 409 |
-
"Ukrainian": {
|
| 410 |
-
"Ostap": "uk-UA-OstapNeural",
|
| 411 |
-
"Polina": "uk-UA-PolinaNeural"
|
| 412 |
-
},
|
| 413 |
-
"Urdu": {
|
| 414 |
-
"Gul": "ur-IN-GulNeural",
|
| 415 |
-
"Salman": "ur-IN-SalmanNeural",
|
| 416 |
-
"Asad": "ur-PK-AsadNeural",
|
| 417 |
-
"Uzma": "ur-PK-UzmaNeural"
|
| 418 |
-
},
|
| 419 |
-
"Uzbek": {
|
| 420 |
-
"Madina": "uz-UZ-MadinaNeural",
|
| 421 |
-
"Sardor": "uz-UZ-SardorNeural"
|
| 422 |
-
},
|
| 423 |
-
"Mandarin": {
|
| 424 |
-
"Xiaoxiao": "zh-CN-XiaoxiaoNeural",
|
| 425 |
-
"Yunyang": "zh-CN-YunyangNeural",
|
| 426 |
-
"Yunxi": "zh-CN-YunxiNeural",
|
| 427 |
-
"Xiaoyi": "zh-CN-XiaoyiNeural",
|
| 428 |
-
"Yunjian": "zh-CN-YunjianNeural",
|
| 429 |
-
"Yunxia": "zh-CN-YunxiaNeural",
|
| 430 |
-
"Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
|
| 431 |
-
"Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
|
| 432 |
-
"HiuMaan": "zh-HK-HiuMaanNeural",
|
| 433 |
-
"HiuGaai": "zh-HK-HiuGaaiNeural",
|
| 434 |
-
"WanLung": "zh-HK-WanLungNeural",
|
| 435 |
-
"HsiaoChen": "zh-TW-HsiaoChenNeural",
|
| 436 |
-
"HsiaoYu": "zh-TW-HsiaoYuNeural",
|
| 437 |
-
"YunJhe": "zh-TW-YunJheNeural"
|
| 438 |
-
},
|
| 439 |
-
"Zulu": {
|
| 440 |
-
"Thando": "zu-ZA-ThandoNeural",
|
| 441 |
-
"Themba": "zu-ZA-ThembaNeural"
|
| 442 |
-
}
|
| 443 |
-
}
|
| 444 |
-
|
| 445 |
-
client = Client("MohamedRashad/arabic-auto-tashkeel")
|
| 446 |
-
|
| 447 |
-
async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False):
|
| 448 |
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
input_text=araby.strip_diacritics(text),
|
| 453 |
-
api_name="/infer_shakkala"
|
| 454 |
-
)
|
| 455 |
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
|
|
|
|
|
|
| 479 |
with gr.Row():
|
| 480 |
-
with gr.Column():
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
| 484 |
)
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
|
| 496 |
if __name__ == "__main__":
|
| 497 |
-
demo.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tempfile
|
| 4 |
+
import librosa
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
from scipy import signal
|
| 7 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
class AIHumanizer:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
pass
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
def humanize_audio(self, audio_path, intensity=0.7):
|
| 14 |
+
"""Remove AI artifacts and make audio sound human-made"""
|
| 15 |
+
try:
|
| 16 |
+
print(f"Loading audio from: {audio_path}")
|
| 17 |
+
|
| 18 |
+
# Load the full song
|
| 19 |
+
y, sr = librosa.load(audio_path, sr=None, mono=False)
|
| 20 |
+
|
| 21 |
+
print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}")
|
| 22 |
+
|
| 23 |
+
# If stereo, process both channels
|
| 24 |
+
if len(y.shape) > 1:
|
| 25 |
+
print("Processing stereo audio...")
|
| 26 |
+
processed_channels = []
|
| 27 |
+
for i in range(y.shape[0]):
|
| 28 |
+
print(f"Processing channel {i+1}...")
|
| 29 |
+
processed_channel = self.process_channel(y[i], sr, intensity)
|
| 30 |
+
processed_channels.append(processed_channel)
|
| 31 |
+
y_processed = np.array(processed_channels)
|
| 32 |
+
else:
|
| 33 |
+
print("Processing mono audio...")
|
| 34 |
+
y_processed = self.process_channel(y, sr, intensity)
|
| 35 |
+
y_processed = np.array([y_processed])
|
| 36 |
+
|
| 37 |
+
print("Audio processing completed successfully")
|
| 38 |
+
return y_processed, sr
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Error in humanize_audio: {str(e)}")
|
| 42 |
+
raise Exception(f"Humanization failed: {str(e)}")
|
| 43 |
+
|
| 44 |
+
def process_channel(self, y, sr, intensity):
|
| 45 |
+
"""Process a single audio channel to remove AI artifacts"""
|
| 46 |
+
print(f"Processing channel: {len(y)} samples")
|
| 47 |
+
|
| 48 |
+
# Store original for blending
|
| 49 |
+
y_original = y.copy()
|
| 50 |
+
|
| 51 |
+
# 1. Reduce robotic frequencies
|
| 52 |
+
y = self.reduce_ai_artifacts(y, sr, intensity)
|
| 53 |
+
|
| 54 |
+
# 2. Add timing variations
|
| 55 |
+
y = self.add_timing_variations(y, sr, intensity)
|
| 56 |
+
|
| 57 |
+
# 3. Add pitch variations
|
| 58 |
+
y = self.add_pitch_variations(y, sr, intensity)
|
| 59 |
+
|
| 60 |
+
# 4. Add room ambiance
|
| 61 |
+
y = self.add_room_ambiance(y, sr, intensity)
|
| 62 |
+
|
| 63 |
+
# 5. Add analog warmth
|
| 64 |
+
y = self.add_analog_warmth(y, sr, intensity)
|
| 65 |
+
|
| 66 |
+
# 6. Reduce perfect quantization
|
| 67 |
+
y = self.reduce_perfect_quantization(y, sr, intensity)
|
| 68 |
+
|
| 69 |
+
return y
|
| 70 |
+
|
| 71 |
+
def reduce_ai_artifacts(self, y, sr, intensity):
|
| 72 |
+
"""Reduce common AI audio artifacts"""
|
| 73 |
+
if sr > 4000 and intensity > 0.1:
|
| 74 |
+
try:
|
| 75 |
+
# Reduce harsh frequencies in the 2kHz-6kHz range
|
| 76 |
+
sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
|
| 77 |
+
y_filtered = signal.sosfilt(sos, y)
|
| 78 |
+
|
| 79 |
+
# Blend with original
|
| 80 |
+
blend_factor = 0.3 * intensity
|
| 81 |
+
return y * (1 - blend_factor) + y_filtered * blend_factor
|
| 82 |
+
except:
|
| 83 |
+
return y
|
| 84 |
+
return y
|
| 85 |
+
|
| 86 |
+
def add_timing_variations(self, y, sr, intensity):
|
| 87 |
+
"""Add subtle timing variations"""
|
| 88 |
+
if intensity < 0.2:
|
| 89 |
+
return y
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
# Simple approach: small random stretches
|
| 93 |
+
segment_size = int(sr * 1.0) # 1-second segments
|
| 94 |
+
if len(y) < segment_size * 2:
|
| 95 |
+
return y
|
| 96 |
+
|
| 97 |
+
segments = []
|
| 98 |
+
for i in range(0, len(y), segment_size):
|
| 99 |
+
segment = y[i:i+segment_size]
|
| 100 |
+
if len(segment) == segment_size:
|
| 101 |
+
# Small random stretch
|
| 102 |
+
stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity
|
| 103 |
+
new_len = int(segment_size * stretch)
|
| 104 |
+
|
| 105 |
+
# Resample
|
| 106 |
+
x_old = np.linspace(0, 1, segment_size)
|
| 107 |
+
x_new = np.linspace(0, 1, new_len)
|
| 108 |
+
segment_stretched = np.interp(x_new, x_old, segment)
|
| 109 |
+
|
| 110 |
+
# Trim or pad to original length
|
| 111 |
+
if len(segment_stretched) > segment_size:
|
| 112 |
+
segment_stretched = segment_stretched[:segment_size]
|
| 113 |
+
else:
|
| 114 |
+
segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched)))
|
| 115 |
+
|
| 116 |
+
segments.append(segment_stretched)
|
| 117 |
+
else:
|
| 118 |
+
segments.append(segment)
|
| 119 |
+
|
| 120 |
+
return np.concatenate(segments)
|
| 121 |
+
except:
|
| 122 |
+
return y
|
| 123 |
+
|
| 124 |
+
def add_pitch_variations(self, y, sr, intensity):
|
| 125 |
+
"""Add subtle pitch variations"""
|
| 126 |
+
if intensity < 0.3:
|
| 127 |
+
return y
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
# Small random pitch shifts
|
| 131 |
+
n_steps = np.random.uniform(-0.2, 0.2) * intensity
|
| 132 |
+
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
|
| 133 |
+
|
| 134 |
+
# Blend
|
| 135 |
+
blend_factor = 0.2 * intensity
|
| 136 |
+
return y * (1 - blend_factor) + y_shifted * blend_factor
|
| 137 |
+
except:
|
| 138 |
+
return y
|
| 139 |
+
|
| 140 |
+
def add_room_ambiance(self, y, sr, intensity):
|
| 141 |
+
"""Add natural room reverb"""
|
| 142 |
+
if intensity < 0.2:
|
| 143 |
+
return y
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
# Simple reverb impulse
|
| 147 |
+
impulse_len = int(0.15 * sr)
|
| 148 |
+
if impulse_len < 10:
|
| 149 |
+
return y
|
| 150 |
+
|
| 151 |
+
impulse = np.zeros(impulse_len)
|
| 152 |
+
# Early reflection
|
| 153 |
+
early = int(0.01 * sr)
|
| 154 |
+
if early < impulse_len:
|
| 155 |
+
impulse[early] = 0.8
|
| 156 |
+
# Reverb tail
|
| 157 |
+
tail_start = min(early + 1, impulse_len)
|
| 158 |
+
if tail_start < impulse_len:
|
| 159 |
+
tail_len = impulse_len - tail_start
|
| 160 |
+
decay = np.exp(-np.linspace(0, 6, tail_len))
|
| 161 |
+
impulse[tail_start:] = decay * 0.4
|
| 162 |
+
|
| 163 |
+
# Apply convolution
|
| 164 |
+
y_reverb = signal.convolve(y, impulse, mode='same')
|
| 165 |
+
# Normalize
|
| 166 |
+
if np.max(np.abs(y_reverb)) > 0:
|
| 167 |
+
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
|
| 168 |
+
|
| 169 |
+
# Blend
|
| 170 |
+
blend_factor = 0.1 * intensity
|
| 171 |
+
return y * (1 - blend_factor) + y_reverb * blend_factor
|
| 172 |
+
except:
|
| 173 |
+
return y
|
| 174 |
+
|
| 175 |
+
def add_analog_warmth(self, y, sr, intensity):
|
| 176 |
+
"""Add analog-style warmth"""
|
| 177 |
+
if intensity < 0.1:
|
| 178 |
+
return y
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
# Soft clipping
|
| 182 |
+
saturation = 1.0 + 0.4 * intensity
|
| 183 |
+
y_warm = np.tanh(y * saturation) / saturation
|
| 184 |
+
|
| 185 |
+
# Gentle low boost
|
| 186 |
+
if sr > 1000:
|
| 187 |
+
sos = signal.butter(2, 100, 'high', fs=sr, output='sos')
|
| 188 |
+
y_warm = signal.sosfilt(sos, y_warm)
|
| 189 |
+
|
| 190 |
+
blend_factor = 0.15 * intensity
|
| 191 |
+
return y * (1 - blend_factor) + y_warm * blend_factor
|
| 192 |
+
except:
|
| 193 |
+
return y
|
| 194 |
+
|
| 195 |
+
def reduce_perfect_quantization(self, y, sr, intensity):
|
| 196 |
+
"""Reduce perfectly quantized timing"""
|
| 197 |
+
if intensity < 0.1:
|
| 198 |
+
return y
|
| 199 |
+
|
| 200 |
+
# Add subtle amplitude variations
|
| 201 |
+
t = np.arange(len(y)) / sr
|
| 202 |
+
# Slow LFO for natural dynamics
|
| 203 |
+
lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity
|
| 204 |
+
# Faster LFO for micro-variations
|
| 205 |
+
lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity
|
| 206 |
+
# Random noise
|
| 207 |
+
noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y))
|
| 208 |
+
|
| 209 |
+
combined = lfo1 * lfo2 * noise
|
| 210 |
+
return y * combined
|
| 211 |
|
| 212 |
+
def humanize_song(input_audio, intensity):
|
| 213 |
+
"""Main humanization function"""
|
| 214 |
+
if input_audio is None:
|
| 215 |
+
return None, "Please upload an audio file"
|
| 216 |
+
|
| 217 |
+
humanizer = AIHumanizer()
|
| 218 |
+
|
| 219 |
+
try:
|
| 220 |
+
print("Starting humanization...")
|
| 221 |
+
|
| 222 |
+
# Get the file path from the audio input
|
| 223 |
+
audio_path = input_audio
|
| 224 |
+
|
| 225 |
+
# Process the audio
|
| 226 |
+
audio_data, sr = humanizer.humanize_audio(audio_path, intensity)
|
| 227 |
+
|
| 228 |
+
print(f"Processing complete. Saving audio...")
|
| 229 |
+
|
| 230 |
+
# Save as WAV
|
| 231 |
+
output_path = tempfile.mktemp(suffix='_humanized.wav')
|
| 232 |
+
|
| 233 |
+
# Handle stereo/mono properly
|
| 234 |
+
if audio_data.shape[0] == 1:
|
| 235 |
+
# Mono
|
| 236 |
+
sf.write(output_path, audio_data[0], sr)
|
| 237 |
+
else:
|
| 238 |
+
# Stereo - transpose for soundfile
|
| 239 |
+
sf.write(output_path, audio_data.T, sr)
|
| 240 |
+
|
| 241 |
+
print(f"Saved to: {output_path}")
|
| 242 |
+
return output_path, "✅ Success! Your song now sounds human-made. Download below."
|
| 243 |
+
|
| 244 |
+
except Exception as e:
|
| 245 |
+
error_msg = f"❌ Error: {str(e)}"
|
| 246 |
+
print(f"Error details: {error_msg}")
|
| 247 |
+
return None, error_msg
|
| 248 |
|
| 249 |
+
# Simple and compatible interface
|
| 250 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
|
| 251 |
+
gr.Markdown("""
|
| 252 |
+
# 🎵 AI Song Humanizer
|
| 253 |
+
**Remove AI Detection - Make Songs Sound Human-Made**
|
| 254 |
+
|
| 255 |
+
*Upload AI song → Remove robotic sound → Download human version*
|
| 256 |
+
""")
|
| 257 |
+
|
| 258 |
with gr.Row():
|
| 259 |
+
with gr.Column(scale=1):
|
| 260 |
+
gr.Markdown("### 1. Upload AI Song")
|
| 261 |
+
input_audio = gr.Audio(
|
| 262 |
+
sources=["upload"],
|
| 263 |
+
type="filepath",
|
| 264 |
+
label="Upload your AI-generated song"
|
| 265 |
)
|
| 266 |
+
|
| 267 |
+
gr.Markdown("### 2. Humanization Strength")
|
| 268 |
+
intensity = gr.Slider(
|
| 269 |
+
0.1, 1.0, value=0.7,
|
| 270 |
+
label="How much human feel to add"
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
process_btn = gr.Button(
|
| 274 |
+
"✨ Humanize Song",
|
| 275 |
+
variant="primary",
|
| 276 |
+
size="lg"
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
with gr.Column(scale=1):
|
| 280 |
+
gr.Markdown("### 3. Download Result")
|
| 281 |
+
output_audio = gr.Audio(
|
| 282 |
+
label="Human-Sounding Version",
|
| 283 |
+
type="filepath"
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
status = gr.Textbox(
|
| 287 |
+
label="Status",
|
| 288 |
+
interactive=False
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
with gr.Accordion("📖 How It Works", open=False):
|
| 292 |
+
gr.Markdown("""
|
| 293 |
+
**This tool:**
|
| 294 |
+
- Takes your complete AI-generated song
|
| 295 |
+
- Removes robotic/AI artifacts
|
| 296 |
+
- Adds natural human performance variations
|
| 297 |
+
- Keeps your original music intact
|
| 298 |
+
- Makes it sound like humans performed it
|
| 299 |
+
|
| 300 |
+
**Perfect for making AI music undetectable!**
|
| 301 |
+
""")
|
| 302 |
+
|
| 303 |
+
process_btn.click(
|
| 304 |
+
fn=humanize_song,
|
| 305 |
+
inputs=[input_audio, intensity],
|
| 306 |
+
outputs=[output_audio, status]
|
| 307 |
+
)
|
| 308 |
|
| 309 |
if __name__ == "__main__":
|
| 310 |
+
demo.launch()
|