| model: FunCineForgeLM |
| model_conf: |
| lsm_weight: 0.0 |
| length_normalized_loss: true |
| codec_unit: 6761 |
| timespk_unit: 1550 |
| face_size: 512 |
| llm: Qwen2-0.5B |
| llm_conf: |
| hub: hf |
| freeze: false |
| llm_dtype: fp32 |
| init_param_path: ../tokenizer/Qwen2-0.5B-CosyVoice-BlankEN |
| use_lora: false |
| lora_conf: |
| task_type: CAUSAL_LM |
| r: 16 |
| lora_alpha: 32 |
| lora_dropout: 0.05 |
| bias: none |
| target_modules: |
| - q_proj |
| - v_proj |
| train_conf: |
| use_lora: ${llm_conf.use_lora} |
| accum_grad: 1 |
| grad_clip: 5 |
| max_epoch: 200 |
| log_interval: 100 |
| effective_save_name_excludes: |
| - none |
| resume: true |
| validate_interval: 5000 |
| save_checkpoint_interval: 5000 |
| keep_nbest_models: 100000 |
| avg_nbest_model: 5 |
| use_bf16: false |
| save_init_model: false |
| loss_rescale_by_rank: false |
| use_deepspeed: true |
| deepspeed_config: decode_conf/ds_stage0_fp32.json |
| optim: adamw |
| optim_conf: |
| lr: 8.0e-05 |
| scheduler: warmuplr |
| scheduler_conf: |
| warmup_steps: 2000 |
| dataset: FunCineForgeDataset |
| dataset_conf: |
| use_emotion_clue: true |
| codebook_size: 6561 |
| sos: 6561 |
| eos: 6562 |
| turn_of_speech: 6563 |
| fill_token: 6564 |
| ignore_id: -100 |
| startofclue_token: 151646 |
| endofclue_token: 151647 |
| frame_shift: 25 |
| timebook_size: 1500 |
| pangbai: 1500 |
| dubai: 1501 |
| duihua: 1502 |
| duoren: 1503 |
| male: 1504 |
| female: 1505 |
| child: 1506 |
| youth: 1507 |
| adult: 1508 |
| middle: 1509 |
| elderly: 1510 |
| speaker_id_start: 1511 |
| index_ds: CosyVoice |
| dataloader: DataloaderMapStyle |
| load_meta_data_key: text,clue,token,face,dialogue |
| data_split_num: 1 |
| batch_sampler: BatchSampler |
| shuffle: true |
| sort_size: 512 |
| face_size: 512 |
| batch_type: token |
| batch_size: 3000 |
| batch_size_token_max: 20000 |
| batch_size_sample_max: 100 |
| max_token_length: 5000 |
| max_text_length: 300 |
| batch_size_scale_threshold: 3000 |
| num_workers: 20 |
| retry: 100 |
| specaug: FunCineForgeSpecAug |
| specaug_conf: |
| apply_time_warp: false |
| apply_freq_mask: false |
| apply_time_mask: true |
| time_mask_width_ratio_range: |
| - 0 |
| - 0.05 |
| num_time_mask: 10 |
| fill_value: -100 |
| tokenizer: FunCineForgeTokenizer |
| tokenizer_conf: |
| init_param_path: ${llm_conf.init_param_path} |
| face_encoder: FaceRecIR101 |
| face_encoder_conf: |
| init_param_path: ../speaker_diarization/pretrained_models/face_recog_ir101.onnx |
| enable_tf32: true |
| debug: false |
| train_data_set_list: /nfs/yanzhang.ljx/workspace/datasets/YingShi/clean/train.jsonl |
| valid_data_set_list: /nfs/yanzhang.ljx/workspace/datasets/YingShi/clean/test.jsonl |
| output_dir: /cpfs_fundata/yanzhang.ljx/workspace/exps/1m-8gpu/zh_en |
| init_param: /nfs/hengwu.zty/exps/4m-8gpu/CosyVoice_MixedAM_5b15_Qwen2_500M_phn_fp32_fsq6561_simple_sys_minmo_l12_merge_cosyvoice3d5_baiyinku_emilia_yodas2_0605/ds-model.pt.ep0.290000/mp_rank_00_model_states.pt |
| device: cpu |
|
|