model: FunCineForgeLM model_conf: lsm_weight: 0.0 length_normalized_loss: true codec_unit: 6761 timespk_unit: 1550 face_size: 512 llm: Qwen2-0.5B llm_conf: hub: hf freeze: false llm_dtype: fp32 init_param_path: ../tokenizer/Qwen2-0.5B-CosyVoice-BlankEN use_lora: false lora_conf: task_type: CAUSAL_LM r: 16 lora_alpha: 32 lora_dropout: 0.05 bias: none target_modules: - q_proj - v_proj train_conf: use_lora: ${llm_conf.use_lora} accum_grad: 1 grad_clip: 5 max_epoch: 200 log_interval: 100 effective_save_name_excludes: - none resume: true validate_interval: 5000 save_checkpoint_interval: 5000 keep_nbest_models: 100000 avg_nbest_model: 5 use_bf16: false save_init_model: false loss_rescale_by_rank: false use_deepspeed: true deepspeed_config: decode_conf/ds_stage0_fp32.json optim: adamw optim_conf: lr: 8.0e-05 scheduler: warmuplr scheduler_conf: warmup_steps: 2000 dataset: FunCineForgeDataset dataset_conf: use_emotion_clue: true codebook_size: 6561 sos: 6561 eos: 6562 turn_of_speech: 6563 fill_token: 6564 ignore_id: -100 startofclue_token: 151646 endofclue_token: 151647 frame_shift: 25 timebook_size: 1500 pangbai: 1500 dubai: 1501 duihua: 1502 duoren: 1503 male: 1504 female: 1505 child: 1506 youth: 1507 adult: 1508 middle: 1509 elderly: 1510 speaker_id_start: 1511 index_ds: CosyVoice dataloader: DataloaderMapStyle load_meta_data_key: text,clue,token,face,dialogue data_split_num: 1 batch_sampler: BatchSampler shuffle: true sort_size: 512 face_size: 512 batch_type: token batch_size: 3000 batch_size_token_max: 20000 batch_size_sample_max: 100 max_token_length: 5000 max_text_length: 300 batch_size_scale_threshold: 3000 num_workers: 20 retry: 100 specaug: FunCineForgeSpecAug specaug_conf: apply_time_warp: false apply_freq_mask: false apply_time_mask: true time_mask_width_ratio_range: - 0 - 0.05 num_time_mask: 10 fill_value: -100 tokenizer: FunCineForgeTokenizer tokenizer_conf: init_param_path: ${llm_conf.init_param_path} face_encoder: FaceRecIR101 face_encoder_conf: init_param_path: ../speaker_diarization/pretrained_models/face_recog_ir101.onnx enable_tf32: true debug: false train_data_set_list: /nfs/yanzhang.ljx/workspace/datasets/YingShi/clean/train.jsonl valid_data_set_list: /nfs/yanzhang.ljx/workspace/datasets/YingShi/clean/test.jsonl output_dir: /cpfs_fundata/yanzhang.ljx/workspace/exps/1m-8gpu/zh_en init_param: /nfs/hengwu.zty/exps/4m-8gpu/CosyVoice_MixedAM_5b15_Qwen2_500M_phn_fp32_fsq6561_simple_sys_minmo_l12_merge_cosyvoice3d5_baiyinku_emilia_yodas2_0605/ds-model.pt.ep0.290000/mp_rank_00_model_states.pt device: cpu