Commit
·
4036c77
1
Parent(s):
8fb9ccc
get rid of fairseq requirement
Browse files- app.py +47 -48
- requirements.txt +0 -4
app.py
CHANGED
|
@@ -19,7 +19,6 @@ from subprocess import Popen
|
|
| 19 |
import faiss
|
| 20 |
from random import shuffle
|
| 21 |
import json, datetime, requests
|
| 22 |
-
from gtts import gTTS
|
| 23 |
now_dir = os.getcwd()
|
| 24 |
sys.path.append(now_dir)
|
| 25 |
tmp = os.path.join(now_dir, "TEMP")
|
|
@@ -1426,53 +1425,53 @@ def stoptraining(mim):
|
|
| 1426 |
)
|
| 1427 |
|
| 1428 |
|
| 1429 |
-
def elevenTTS(xiapi, text, id, lang):
|
| 1430 |
-
|
| 1431 |
-
|
| 1432 |
-
|
| 1433 |
-
|
| 1434 |
-
|
| 1435 |
-
|
| 1436 |
-
|
| 1437 |
-
|
| 1438 |
-
|
| 1439 |
-
|
| 1440 |
-
|
| 1441 |
-
|
| 1442 |
-
|
| 1443 |
-
|
| 1444 |
-
|
| 1445 |
-
|
| 1446 |
-
|
| 1447 |
-
|
| 1448 |
-
|
| 1449 |
-
|
| 1450 |
-
|
| 1451 |
-
|
| 1452 |
-
|
| 1453 |
-
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
-
|
| 1457 |
-
|
| 1458 |
-
|
| 1459 |
-
|
| 1460 |
-
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
-
|
| 1468 |
-
|
| 1469 |
-
|
| 1470 |
-
|
| 1471 |
-
def ilariaTTS(text, ttsvoice):
|
| 1472 |
-
|
| 1473 |
-
|
| 1474 |
-
|
| 1475 |
-
|
| 1476 |
|
| 1477 |
def transcribe_btn_click(audio_choice):
|
| 1478 |
batch_size = 1 # Adjust based on your GPU memory availability
|
|
|
|
| 19 |
import faiss
|
| 20 |
from random import shuffle
|
| 21 |
import json, datetime, requests
|
|
|
|
| 22 |
now_dir = os.getcwd()
|
| 23 |
sys.path.append(now_dir)
|
| 24 |
tmp = os.path.join(now_dir, "TEMP")
|
|
|
|
| 1425 |
)
|
| 1426 |
|
| 1427 |
|
| 1428 |
+
# def elevenTTS(xiapi, text, id, lang):
|
| 1429 |
+
# if xiapi!= '' and id !='':
|
| 1430 |
+
# choice = chosen_voice[id]
|
| 1431 |
+
# CHUNK_SIZE = 1024
|
| 1432 |
+
# url = f"https://api.elevenlabs.io/v1/text-to-speech/{choice}"
|
| 1433 |
+
# headers = {
|
| 1434 |
+
# "Accept": "audio/mpeg",
|
| 1435 |
+
# "Content-Type": "application/json",
|
| 1436 |
+
# "xi-api-key": xiapi
|
| 1437 |
+
# }
|
| 1438 |
+
# if lang == 'en':
|
| 1439 |
+
# data = {
|
| 1440 |
+
# "text": text,
|
| 1441 |
+
# "model_id": "eleven_monolingual_v1",
|
| 1442 |
+
# "voice_settings": {
|
| 1443 |
+
# "stability": 0.5,
|
| 1444 |
+
# "similarity_boost": 0.5
|
| 1445 |
+
# }
|
| 1446 |
+
# }
|
| 1447 |
+
# else:
|
| 1448 |
+
# data = {
|
| 1449 |
+
# "text": text,
|
| 1450 |
+
# "model_id": "eleven_multilingual_v1",
|
| 1451 |
+
# "voice_settings": {
|
| 1452 |
+
# "stability": 0.5,
|
| 1453 |
+
# "similarity_boost": 0.5
|
| 1454 |
+
# }
|
| 1455 |
+
# }
|
| 1456 |
+
|
| 1457 |
+
# response = requests.post(url, json=data, headers=headers)
|
| 1458 |
+
# with open('./temp_eleven.mp3', 'wb') as f:
|
| 1459 |
+
# for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
|
| 1460 |
+
# if chunk:
|
| 1461 |
+
# f.write(chunk)
|
| 1462 |
+
# aud_path = save_to_wav('./temp_eleven.mp3')
|
| 1463 |
+
# return aud_path, aud_path
|
| 1464 |
+
# else:
|
| 1465 |
+
# tts = gTTS(text, lang=lang)
|
| 1466 |
+
# tts.save('./temp_gTTS.mp3')
|
| 1467 |
+
# aud_path = save_to_wav('./temp_gTTS.mp3')
|
| 1468 |
+
# return aud_path, aud_path
|
| 1469 |
+
|
| 1470 |
+
# def ilariaTTS(text, ttsvoice):
|
| 1471 |
+
# vo=language_dict[ttsvoice]
|
| 1472 |
+
# asyncio.run(edge_tts.Communicate(text, vo).save("./temp_ilaria.mp3"))
|
| 1473 |
+
# aud_path = save_to_wav('./temp_ilaria.mp3')
|
| 1474 |
+
# return aud_path, aud_path
|
| 1475 |
|
| 1476 |
def transcribe_btn_click(audio_choice):
|
| 1477 |
batch_size = 1 # Adjust based on your GPU memory availability
|
requirements.txt
CHANGED
|
@@ -1,6 +1,3 @@
|
|
| 1 |
-
gTTS
|
| 2 |
-
elevenlabs
|
| 3 |
-
edge-tts
|
| 4 |
stftpitchshift==1.5.1
|
| 5 |
torchcrepe
|
| 6 |
setuptools
|
|
@@ -8,7 +5,6 @@ wheel
|
|
| 8 |
whisper
|
| 9 |
httpx==0.23.0
|
| 10 |
faiss-gpu
|
| 11 |
-
fairseq==0.12.2
|
| 12 |
gradio==3.34.0
|
| 13 |
ffmpeg-python
|
| 14 |
praat-parselmouth
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
stftpitchshift==1.5.1
|
| 2 |
torchcrepe
|
| 3 |
setuptools
|
|
|
|
| 5 |
whisper
|
| 6 |
httpx==0.23.0
|
| 7 |
faiss-gpu
|
|
|
|
| 8 |
gradio==3.34.0
|
| 9 |
ffmpeg-python
|
| 10 |
praat-parselmouth
|