xxx123456 commited on
Commit
8aad9db
·
verified ·
1 Parent(s): a015fc6

Upload SimWhisperCodec.yaml

Browse files
Files changed (1) hide show
  1. SimWhisperCodec.yaml +76 -0
SimWhisperCodec.yaml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generator_params:
2
+ input_sample_rate: 16000
3
+ output_sample_rate: 16000
4
+ mel_hop_length: 160
5
+ encoder_downsample_rate: 1280
6
+ decoder_upsample_rate: 1280
7
+
8
+ feature_extractor:
9
+ chunk_length: 30
10
+ feature_size: 80
11
+ sampling_rate: 16000
12
+ hop_length: 160
13
+ n_fft: 400
14
+ n_samples: 480000
15
+ nb_max_frames: 3000
16
+ padding_side: "right"
17
+ padding_value: 0.0
18
+ return_attention_mask: false
19
+
20
+ acoustic_encoder:
21
+ num_mel_bins: 80
22
+ sampling_rate: 16000
23
+ hop_length: 160
24
+ stride_size: 2
25
+ kernel_size: 3
26
+ d_model: 768
27
+ scale_embedding: false
28
+ max_audio_seconds: 30
29
+ encoder_layers: 12
30
+ encoder_attention_heads: 12
31
+ encoder_ffn_dim: 3072
32
+ is_acoustic: true
33
+ freeze: true
34
+
35
+ # 下采样
36
+ downsample:
37
+ in_dim: 768
38
+ latent_dim: 32
39
+ stack_factor: 4
40
+ hidden_dim: 512
41
+
42
+ # GroupFSQ量化器
43
+ quantizer:
44
+ num_groups: 8
45
+ num_levels_per_group: [8, 7, 6, 6]
46
+ eps: 0.001
47
+
48
+ # 上采样
49
+ upsample:
50
+ latent_dim: 32
51
+ out_dim: 768
52
+ stack_factor: 4
53
+ hidden_dim: 512
54
+
55
+ acoustic_decoder:
56
+ num_mel_bins: 80
57
+ sampling_rate: 16000
58
+ hop_length: 160
59
+ stride_size: 2
60
+ kernel_size: 3
61
+ d_model: 768
62
+ scale_embedding: false
63
+ max_audio_seconds: 30
64
+ decoder_layers: 12
65
+ decoder_attention_heads: 12
66
+ decoder_ffn_dim: 3072
67
+ activation_function: "gelu"
68
+
69
+ vocos:
70
+ input_channels: 80
71
+ dim: 512
72
+ intermediate_dim: 4096
73
+ num_layers: 24
74
+ n_fft: 640
75
+ hop_size: 160
76
+ padding: "same"