Spaces:
Running
Running
audionar
Browse files
app.py
CHANGED
|
@@ -11,29 +11,16 @@ from transformers import Wav2Vec2Processor
|
|
| 11 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
|
| 12 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel
|
| 13 |
import audiofile
|
| 14 |
-
import unicodedata
|
| 15 |
-
import textwrap
|
| 16 |
from tts import StyleTTS2
|
| 17 |
import audresample
|
| 18 |
-
|
| 19 |
-
# --
|
| 20 |
-
# -*- coding: utf-8 -*-
|
| 21 |
-
|
| 22 |
-
# https://huggingface.co/spaces/dpc/mmstts/tree/main
|
| 23 |
-
# https://huggingface.co/spaces/mms-meta/MMS/blob/main/tts.py
|
| 24 |
-
|
| 25 |
import json
|
| 26 |
-
import soundfile
|
| 27 |
import re
|
| 28 |
import unicodedata
|
| 29 |
-
import gradio as gr
|
| 30 |
import textwrap
|
| 31 |
-
import numpy as np
|
| 32 |
-
import torch
|
| 33 |
import nltk
|
| 34 |
from num2words import num2words
|
| 35 |
from num2word_greek.numbers2words import convert_numbers
|
| 36 |
-
from
|
| 37 |
|
| 38 |
nltk.download('punkt', download_dir='./')
|
| 39 |
nltk.download('punkt_tab', download_dir='./')
|
|
@@ -534,14 +521,14 @@ def audionar_tts(text=None,
|
|
| 534 |
lang_code=lang_code,
|
| 535 |
)[0, :]
|
| 536 |
total_audio.append(x)
|
| 537 |
-
|
| 538 |
print(f'\n\n_______________________________ {_t} {x.shape=}')
|
| 539 |
|
| 540 |
x = torch.cat(total_audio).cpu().numpy()
|
| 541 |
-
|
| 542 |
tmp_file = f'_speech.wav'
|
| 543 |
-
|
| 544 |
-
|
| 545 |
|
| 546 |
return tmp_file
|
| 547 |
|
|
|
|
| 11 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
|
| 12 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel
|
| 13 |
import audiofile
|
|
|
|
|
|
|
| 14 |
from tts import StyleTTS2
|
| 15 |
import audresample
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
import json
|
|
|
|
| 17 |
import re
|
| 18 |
import unicodedata
|
|
|
|
| 19 |
import textwrap
|
|
|
|
|
|
|
| 20 |
import nltk
|
| 21 |
from num2words import num2words
|
| 22 |
from num2word_greek.numbers2words import convert_numbers
|
| 23 |
+
from audionar import VitsModel, VitsTokenizer
|
| 24 |
|
| 25 |
nltk.download('punkt', download_dir='./')
|
| 26 |
nltk.download('punkt_tab', download_dir='./')
|
|
|
|
| 521 |
lang_code=lang_code,
|
| 522 |
)[0, :]
|
| 523 |
total_audio.append(x)
|
| 524 |
+
|
| 525 |
print(f'\n\n_______________________________ {_t} {x.shape=}')
|
| 526 |
|
| 527 |
x = torch.cat(total_audio).cpu().numpy()
|
| 528 |
+
|
| 529 |
tmp_file = f'_speech.wav'
|
| 530 |
+
|
| 531 |
+
audiofile.write(tmp_file, x, 16000)
|
| 532 |
|
| 533 |
return tmp_file
|
| 534 |
|