cndn
commited on
Commit
·
bc174ff
1
Parent(s):
0ffb909
adjust
Browse files- README.md +9 -0
- config.yaml +7 -0
- example_config.yaml +0 -40
- example_readme.md +0 -12
README.md
CHANGED
|
@@ -1,3 +1,12 @@
|
|
| 1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
license: cc-by-4.0
|
| 3 |
---
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
library_name: fairseq
|
| 3 |
+
task: audio-to-audio
|
| 4 |
+
tags:
|
| 5 |
+
- fairseq
|
| 6 |
+
- audio
|
| 7 |
+
- audio-to-audio
|
| 8 |
+
- speech-to-speech-translation
|
| 9 |
license: cc-by-4.0
|
| 10 |
---
|
| 11 |
+
You can try out the model on the right of the page by uploading or recording.
|
| 12 |
+
For model usage, please refer to https://huggingface.co/facebook/textless_sm_cs_en
|
config.yaml
CHANGED
|
@@ -31,3 +31,10 @@ vocoder:
|
|
| 31 |
model_path: N/A
|
| 32 |
speaker: false
|
| 33 |
type: code_hifigan
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
model_path: N/A
|
| 32 |
speaker: false
|
| 33 |
type: code_hifigan
|
| 34 |
+
hub:
|
| 35 |
+
input_type: fbank80_w_utt_cmvn
|
| 36 |
+
tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur
|
| 37 |
+
unit_vocoder: true
|
| 38 |
+
generation_args:
|
| 39 |
+
beam: 10
|
| 40 |
+
max_len_a: 1
|
example_config.yaml
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
data_root: N/A
|
| 2 |
-
input_channels: 1
|
| 3 |
-
input_feat_per_channel: 80
|
| 4 |
-
multitask:
|
| 5 |
-
source_unit:
|
| 6 |
-
data: N/A
|
| 7 |
-
decoder_type: transformer
|
| 8 |
-
dict: N/A
|
| 9 |
-
encoder_layer: 6
|
| 10 |
-
loss_weight: 8.0
|
| 11 |
-
target_type: text
|
| 12 |
-
output_channels: 1
|
| 13 |
-
output_feat_per_channel: 1
|
| 14 |
-
output_feat_reduction_rate: 0
|
| 15 |
-
output_sample_rate: 16000
|
| 16 |
-
specaugment:
|
| 17 |
-
freq_mask_F: 27
|
| 18 |
-
freq_mask_N: 1
|
| 19 |
-
time_mask_N: 1
|
| 20 |
-
time_mask_T: 100
|
| 21 |
-
time_mask_p: 1.0
|
| 22 |
-
time_wrap_W: 0
|
| 23 |
-
transforms:
|
| 24 |
-
_eval:
|
| 25 |
-
- utterance_cmvn
|
| 26 |
-
_train:
|
| 27 |
-
- utterance_cmvn
|
| 28 |
-
- specaugment
|
| 29 |
-
vocoder:
|
| 30 |
-
dur_prediction: true
|
| 31 |
-
model_path: N/A
|
| 32 |
-
speaker: false
|
| 33 |
-
type: code_hifigan
|
| 34 |
-
hub:
|
| 35 |
-
input_type: fbank80_w_utt_cmvn
|
| 36 |
-
tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur
|
| 37 |
-
unit_vocoder: true
|
| 38 |
-
generation_args:
|
| 39 |
-
beam: 10
|
| 40 |
-
max_len_a: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
example_readme.md
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
library_name: fairseq
|
| 3 |
-
task: audio-to-audio
|
| 4 |
-
tags:
|
| 5 |
-
- fairseq
|
| 6 |
-
- audio
|
| 7 |
-
- audio-to-audio
|
| 8 |
-
- speech-to-speech-translation
|
| 9 |
-
license: cc-by-4.0
|
| 10 |
-
---
|
| 11 |
-
You can try out the model on the right of the page by uploading or recording.
|
| 12 |
-
For model usage, please refer to https://huggingface.co/facebook/textless_sm_cs_en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|