ajayarora1235 commited on
Commit
4036c77
·
1 Parent(s): 8fb9ccc

get rid of fairseq requirement

Browse files
Files changed (2) hide show
  1. app.py +47 -48
  2. requirements.txt +0 -4
app.py CHANGED
@@ -19,7 +19,6 @@ from subprocess import Popen
19
  import faiss
20
  from random import shuffle
21
  import json, datetime, requests
22
- from gtts import gTTS
23
  now_dir = os.getcwd()
24
  sys.path.append(now_dir)
25
  tmp = os.path.join(now_dir, "TEMP")
@@ -1426,53 +1425,53 @@ def stoptraining(mim):
1426
  )
1427
 
1428
 
1429
- def elevenTTS(xiapi, text, id, lang):
1430
- if xiapi!= '' and id !='':
1431
- choice = chosen_voice[id]
1432
- CHUNK_SIZE = 1024
1433
- url = f"https://api.elevenlabs.io/v1/text-to-speech/{choice}"
1434
- headers = {
1435
- "Accept": "audio/mpeg",
1436
- "Content-Type": "application/json",
1437
- "xi-api-key": xiapi
1438
- }
1439
- if lang == 'en':
1440
- data = {
1441
- "text": text,
1442
- "model_id": "eleven_monolingual_v1",
1443
- "voice_settings": {
1444
- "stability": 0.5,
1445
- "similarity_boost": 0.5
1446
- }
1447
- }
1448
- else:
1449
- data = {
1450
- "text": text,
1451
- "model_id": "eleven_multilingual_v1",
1452
- "voice_settings": {
1453
- "stability": 0.5,
1454
- "similarity_boost": 0.5
1455
- }
1456
- }
1457
-
1458
- response = requests.post(url, json=data, headers=headers)
1459
- with open('./temp_eleven.mp3', 'wb') as f:
1460
- for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
1461
- if chunk:
1462
- f.write(chunk)
1463
- aud_path = save_to_wav('./temp_eleven.mp3')
1464
- return aud_path, aud_path
1465
- else:
1466
- tts = gTTS(text, lang=lang)
1467
- tts.save('./temp_gTTS.mp3')
1468
- aud_path = save_to_wav('./temp_gTTS.mp3')
1469
- return aud_path, aud_path
1470
-
1471
- def ilariaTTS(text, ttsvoice):
1472
- vo=language_dict[ttsvoice]
1473
- asyncio.run(edge_tts.Communicate(text, vo).save("./temp_ilaria.mp3"))
1474
- aud_path = save_to_wav('./temp_ilaria.mp3')
1475
- return aud_path, aud_path
1476
 
1477
  def transcribe_btn_click(audio_choice):
1478
  batch_size = 1 # Adjust based on your GPU memory availability
 
19
  import faiss
20
  from random import shuffle
21
  import json, datetime, requests
 
22
  now_dir = os.getcwd()
23
  sys.path.append(now_dir)
24
  tmp = os.path.join(now_dir, "TEMP")
 
1425
  )
1426
 
1427
 
1428
+ # def elevenTTS(xiapi, text, id, lang):
1429
+ # if xiapi!= '' and id !='':
1430
+ # choice = chosen_voice[id]
1431
+ # CHUNK_SIZE = 1024
1432
+ # url = f"https://api.elevenlabs.io/v1/text-to-speech/{choice}"
1433
+ # headers = {
1434
+ # "Accept": "audio/mpeg",
1435
+ # "Content-Type": "application/json",
1436
+ # "xi-api-key": xiapi
1437
+ # }
1438
+ # if lang == 'en':
1439
+ # data = {
1440
+ # "text": text,
1441
+ # "model_id": "eleven_monolingual_v1",
1442
+ # "voice_settings": {
1443
+ # "stability": 0.5,
1444
+ # "similarity_boost": 0.5
1445
+ # }
1446
+ # }
1447
+ # else:
1448
+ # data = {
1449
+ # "text": text,
1450
+ # "model_id": "eleven_multilingual_v1",
1451
+ # "voice_settings": {
1452
+ # "stability": 0.5,
1453
+ # "similarity_boost": 0.5
1454
+ # }
1455
+ # }
1456
+
1457
+ # response = requests.post(url, json=data, headers=headers)
1458
+ # with open('./temp_eleven.mp3', 'wb') as f:
1459
+ # for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
1460
+ # if chunk:
1461
+ # f.write(chunk)
1462
+ # aud_path = save_to_wav('./temp_eleven.mp3')
1463
+ # return aud_path, aud_path
1464
+ # else:
1465
+ # tts = gTTS(text, lang=lang)
1466
+ # tts.save('./temp_gTTS.mp3')
1467
+ # aud_path = save_to_wav('./temp_gTTS.mp3')
1468
+ # return aud_path, aud_path
1469
+
1470
+ # def ilariaTTS(text, ttsvoice):
1471
+ # vo=language_dict[ttsvoice]
1472
+ # asyncio.run(edge_tts.Communicate(text, vo).save("./temp_ilaria.mp3"))
1473
+ # aud_path = save_to_wav('./temp_ilaria.mp3')
1474
+ # return aud_path, aud_path
1475
 
1476
  def transcribe_btn_click(audio_choice):
1477
  batch_size = 1 # Adjust based on your GPU memory availability
requirements.txt CHANGED
@@ -1,6 +1,3 @@
1
- gTTS
2
- elevenlabs
3
- edge-tts
4
  stftpitchshift==1.5.1
5
  torchcrepe
6
  setuptools
@@ -8,7 +5,6 @@ wheel
8
  whisper
9
  httpx==0.23.0
10
  faiss-gpu
11
- fairseq==0.12.2
12
  gradio==3.34.0
13
  ffmpeg-python
14
  praat-parselmouth
 
 
 
 
1
  stftpitchshift==1.5.1
2
  torchcrepe
3
  setuptools
 
5
  whisper
6
  httpx==0.23.0
7
  faiss-gpu
 
8
  gradio==3.34.0
9
  ffmpeg-python
10
  praat-parselmouth