File size: 3,140 Bytes
1b10e1d 81f7423 1b10e1d 81f7423 1b10e1d fb2aeef 1b10e1d 7fe2a3e 1b10e1d 7fe2a3e 1b10e1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
model: FunASRNano
model_conf:
lsm_weight: 0.1
length_normalized_loss: true
audio_encoder: SenseVoiceEncoderSmall
audio_encoder_conf:
output_size: 512
attention_heads: 4
linear_units: 2048
num_blocks: 50
tp_blocks: 20
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: pe
pos_enc_class: SinusoidalPositionEncoder
normalize_before: true
kernel_size: 11
sanm_shfit: 0
selfattention_layer_type: sanm
freeze: true
freeze_layer_num: -1
feat_permute: true
llm: Qwen3-0.6b
llm_conf:
hub: hf
freeze: true
llm_dtype: bf16
init_param_path: Qwen3-0.6B
use_lora: false
lora_conf:
freeze_lora: true
task_type: CAUSAL_LM
r: 16
lora_alpha: 32
lora_dropout: 0.05
bias: none
target_modules:
- q_proj
- v_proj
init_param_path: ""
audio_adaptor: Transformer
audio_adaptor_conf:
downsample_rate: 1
use_low_frame_rate: true
ffn_dim: 2048
llm_dim: 1024
encoder_dim: 512
n_layer: 2
freeze: true
ctc_decoder: Transformer
detach_ctc_decoder: true
ctc_decoder_conf:
downsample_rate: 1
ffn_dim: 2048
llm_dim: 512
encoder_dim: 512
n_layer: 5
freeze: false
ctc_weight: 1.0
ctc_conf:
dropout_rate: 0.0
ctc_type: builtin
reduce: true
ignore_nan_grad: true
frontend: WavFrontend
frontend_conf:
fs: 16000
window: hamming
n_mels: 80
frame_length: 25
frame_shift: 10
lfr_m: 7
lfr_n: 6
cmvn_file: null
train_conf:
use_lora: ${llm_conf.use_lora}
accum_grad: 1
grad_clip: 5
max_epoch: 2
keep_nbest_models: 200
log_interval: 100
effective_save_name_excludes:
- llm.
resume: true
validate_interval: 2000
save_checkpoint_interval: 2000
avg_nbest_model: 100
use_bf16: false
use_deepspeed: true
deepspeed_config: null
save_init_model: false
optim: adamw
optim_conf:
lr: 5.0e-06
weight_decay: 0.0
scheduler: warmuplr
scheduler_conf:
warmup_steps: 2500
dataset: FunASR
dataset_conf:
index_ds: FunASR
batch_sampler: BatchSampler
batch_type: token
batch_size: 6000
max_token_length: 3500
shuffle: true
sort_size: 1024
batch_size_scale_ratio_max: 2
num_workers: 4
audio_adaptor_downsample_rate: ${audio_adaptor_conf.downsample_rate}
audio_encoder_downsample_rate: 6
data_split_num: 256
batch_size_sample_max: 10
retry: 2000
batch_size_token_max: 6000
max_source_length: 12000
max_target_length: 2048
prompt_classes: MultiContextPrompt
prompt_conf:
max_neg_hotwords_num: 0
min_neg_hotwords_num: 0
use_hist: false
use_one_pass_result: true
use_hotwords: true
use_asr_hotwords: true
chinese_hotwords_list: null
english_hotwords_list: null
ctc_tokenizer: SenseVoiceTokenizer
ctc_target_normalize: true
ctc_tokenizer_conf:
vocab_path: null
is_multilingual: true
num_languages: 8749
min_source_length: 10
batch_size_scale_threshold: 3000
use_dynamic_output_ratio: 0.0
tokenizer: HuggingfaceTokenizer
tokenizer_conf:
init_param_path: ${llm_conf.init_param_path}
enable_tf32: true
debug: false
train_data_set_list: null
valid_data_set_list: null
init_param: null
output_dir: null
|