shenglisten commited on
Commit
8f9938f
·
verified ·
1 Parent(s): 4e768c8

Upload 3 files

Browse files
Files changed (3) hide show
  1. final.pt +3 -0
  2. train.yaml +120 -0
  3. units.txt +0 -0
final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb922e2e4620e90f2d2dc5cbcb4a0b459928941d25e4d871a010a34c633b7766
3
+ size 6705911760
train.yaml ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 8
2
+ cmvn: null
3
+ cmvn_conf:
4
+ cmvn_file: null
5
+ is_json_cmvn: null
6
+ ctc: ctc
7
+ ctc_conf:
8
+ ctc_blank_id: 50363
9
+ dataset: asr
10
+ dataset_conf:
11
+ batch_conf:
12
+ batch_size: 26
13
+ batch_type: dynamic
14
+ max_frames_in_batch: 12000
15
+ feats_type: log_mel_spectrogram
16
+ filter_conf:
17
+ max_length: 3000
18
+ min_length: 30
19
+ token_max_length: 448
20
+ token_min_length: 1
21
+ log_mel_spectrogram_conf:
22
+ hop_length: 160
23
+ n_fft: 400
24
+ num_mel_bins: 128
25
+ padding: 0
26
+ resample_conf:
27
+ resample_rate: 16000
28
+ shuffle: true
29
+ shuffle_conf:
30
+ shuffle_size: 1500
31
+ sort: true
32
+ sort_conf:
33
+ sort_size: 500
34
+ spec_aug: true
35
+ spec_aug_conf:
36
+ max_f: 10
37
+ max_t: 50
38
+ num_f_mask: 2
39
+ num_t_mask: 2
40
+ spec_sub: true
41
+ spec_sub_conf:
42
+ max_t: 30
43
+ num_t_sub: 3
44
+ spec_trim: false
45
+ speed_perturb: false
46
+ decoder: transformer
47
+ decoder_conf:
48
+ activation_type: gelu
49
+ attention_heads: 20
50
+ dropout_rate: 0.0
51
+ gradient_checkpointing: true
52
+ input_layer: embed_learnable_pe
53
+ key_bias: false
54
+ linear_units: 5120
55
+ normalize_before: true
56
+ num_blocks: 32
57
+ positional_dropout_rate: 0.0
58
+ self_attention_dropout_rate: 0.0
59
+ src_attention: true
60
+ src_attention_dropout_rate: 0.0
61
+ tie_word_embedding: true
62
+ use_output_layer: true
63
+ dtype: bf16
64
+ encoder: transformer
65
+ encoder_conf:
66
+ activation_type: gelu
67
+ attention_dropout_rate: 0.0
68
+ attention_heads: 20
69
+ dropout_rate: 0.0
70
+ gradient_checkpointing: true
71
+ input_layer: conv1d2
72
+ key_bias: false
73
+ linear_units: 5120
74
+ normalize_before: true
75
+ num_blocks: 32
76
+ output_size: 1280
77
+ pos_enc_layer_type: abs_pos_whisper
78
+ positional_dropout_rate: 0.0
79
+ static_chunk_size: -1
80
+ use_dynamic_chunk: false
81
+ use_dynamic_left_chunk: false
82
+ grad_clip: 5
83
+ input_dim: 128
84
+ log_interval: 100
85
+ max_epoch: 100
86
+ model: whisper
87
+ model_conf:
88
+ ctc_weight: 0.3
89
+ length_normalized_loss: false
90
+ lsm_weight: 0.1
91
+ model_dir:
92
+ optim: adam
93
+ optim_conf:
94
+ lr: 1.0e-05
95
+ output_dim: 51866
96
+ save_interval: 1000
97
+ save_states: model+optimizer
98
+ scheduler: warmuplr
99
+ scheduler_conf:
100
+ warmup_steps: 12000
101
+ tokenizer: whisper
102
+ tokenizer_conf:
103
+ bpe_path: null
104
+ is_multilingual: true
105
+ non_lang_syms_path: null
106
+ num_languages: 100
107
+ special_tokens:
108
+ eot: 50257
109
+ no_speech: 50363
110
+ no_timestamps: 50364
111
+ sot: 50258
112
+ sot_prev: 50362
113
+ timestamp_begin: 50365
114
+ transcribe: 50360
115
+ translate: 50359
116
+ split_with_space: false
117
+ symbol_table_path: null
118
+ train_engine: deepspeed
119
+ use_amp: false
120
+ vocab_size: 51866
units.txt ADDED
The diff for this file is too large to render. See raw diff