hahunavth commited on
Commit
2e76d6a
·
verified ·
1 Parent(s): dfa623d

Upload model

Browse files
Files changed (2) hide show
  1. config.json +99 -0
  2. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ESSModel"
4
+ ],
5
+ "model": {
6
+ "conformer": {
7
+ "attention_dropout_p": 0.2,
8
+ "conv_dropout_p": 0.2,
9
+ "conv_expansion_factor": 2,
10
+ "conv_kernel_size": 7,
11
+ "decoder_dim": 256,
12
+ "encoder_dim": 256,
13
+ "feed_forward_dropout_p": 0.2,
14
+ "feed_forward_expansion_factor": 4,
15
+ "half_step_residual": true,
16
+ "num_attention_heads": 2,
17
+ "num_decode_layers": 6,
18
+ "num_encode_layers": 4
19
+ },
20
+ "max_seq_len": 1000,
21
+ "mode": "train",
22
+ "num_emotion": 10,
23
+ "reference_encoder": {
24
+ "dropout": 0.2,
25
+ "encoder_dim": 128
26
+ },
27
+ "variance_embedding": {
28
+ "energy_quantization": "linear",
29
+ "n_bins": 256,
30
+ "pitch_quantization": "linear"
31
+ },
32
+ "variance_predictor": {
33
+ "dropout": 0.5,
34
+ "filter_size": 256,
35
+ "kernel_size": 3
36
+ },
37
+ "vocoder": {
38
+ "model": "HiFi-GAN",
39
+ "speaker": "tth"
40
+ }
41
+ },
42
+ "model_type": "emofs2",
43
+ "preprocess": {
44
+ "dataset": "vlsp2023emo",
45
+ "emotion2id": {
46
+ "angry": 3,
47
+ "happy": 1,
48
+ "neutral": 0,
49
+ "sad": 2,
50
+ "surprise": 4
51
+ },
52
+ "id2emotion": {
53
+ "0": "neutral",
54
+ "1": "happy",
55
+ "2": "sad",
56
+ "3": "angry",
57
+ "4": "surprise"
58
+ },
59
+ "path": {
60
+ "corpus_path": "./data/pretrained_tts_dataset/tuyendv.dict",
61
+ "lexicon_path": "../datasets/ess-vlsp2023-lexicon/lexicon.dict",
62
+ "preprocessed_path": "../datasets/ess-vlsp2023-emo-processed-phoneme-level",
63
+ "raw_path": "./data/pretrained_tts_dataset_raw"
64
+ },
65
+ "preprocessing": {
66
+ "audio": {
67
+ "max_wav_value": 32768.0,
68
+ "sampling_rate": 22050
69
+ },
70
+ "energy": {
71
+ "feature": "phoneme_level",
72
+ "normalization": true
73
+ },
74
+ "mel": {
75
+ "mel_fmax": 8000,
76
+ "mel_fmin": 0,
77
+ "n_mel_channels": 80
78
+ },
79
+ "pitch": {
80
+ "feature": "phoneme_level",
81
+ "normalization": true
82
+ },
83
+ "stft": {
84
+ "filter_length": 1024,
85
+ "hop_length": 256,
86
+ "win_length": 1024
87
+ },
88
+ "text": {
89
+ "language": "en",
90
+ "text_cleaners": []
91
+ },
92
+ "val_size": 512
93
+ },
94
+ "smoothing_label": 0.1
95
+ },
96
+ "torch_dtype": "float32",
97
+ "train": null,
98
+ "transformers_version": "4.35.2"
99
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47faa7882bb93e619fc5f1c65a13f373e84d921a11e45f3e1b318f0874619be
3
+ size 5402404