Spaces:
Running on Zero
Running on Zero
| model: FunCineForgeLM | |
| model_conf: | |
| lsm_weight: 0.0 | |
| length_normalized_loss: true | |
| codec_unit: 6761 | |
| timespk_unit: 1550 | |
| face_size: 512 | |
| llm: Qwen2-0.5B | |
| llm_conf: | |
| hub: hf | |
| freeze: false | |
| llm_dtype: fp32 | |
| init_param_path: pretrained_models/Qwen2-0.5B-CosyVoice-BlankEN | |
| use_lora: false | |
| lora_conf: | |
| task_type: CAUSAL_LM | |
| r: 16 | |
| lora_alpha: 32 | |
| lora_dropout: 0.05 | |
| bias: none | |
| target_modules: | |
| - q_proj | |
| - v_proj | |
| train_conf: | |
| use_lora: ${llm_conf.use_lora} | |
| accum_grad: 1 | |
| grad_clip: 5 | |
| max_epoch: 200 | |
| log_interval: 100 | |
| effective_save_name_excludes: | |
| - none | |
| resume: true | |
| validate_interval: 5000 | |
| save_checkpoint_interval: 5000 | |
| keep_nbest_models: 100000 | |
| avg_nbest_model: 5 | |
| use_bf16: false | |
| save_init_model: false | |
| loss_rescale_by_rank: false | |
| use_deepspeed: true | |
| deepspeed_config: decode_conf/ds_stage0_fp32.json | |
| optim: adamw | |
| optim_conf: | |
| lr: 8.0e-05 | |
| scheduler: warmuplr | |
| scheduler_conf: | |
| warmup_steps: 2000 | |
| dataset: FunCineForgeDataset | |
| dataset_conf: | |
| use_emotion_clue: true | |
| codebook_size: 6561 | |
| sos: 6561 | |
| eos: 6562 | |
| turn_of_speech: 6563 | |
| fill_token: 6564 | |
| ignore_id: -100 | |
| startofclue_token: 151646 | |
| endofclue_token: 151647 | |
| frame_shift: 25 | |
| timebook_size: 1500 | |
| pangbai: 1500 | |
| dubai: 1501 | |
| duihua: 1502 | |
| duoren: 1503 | |
| male: 1504 | |
| female: 1505 | |
| child: 1506 | |
| youth: 1507 | |
| adult: 1508 | |
| middle: 1509 | |
| elderly: 1510 | |
| speaker_id_start: 1511 | |
| index_ds: CosyVoice | |
| dataloader: DataloaderMapStyle | |
| load_meta_data_key: text,clue,token,face,dialogue | |
| data_split_num: 1 | |
| batch_sampler: BatchSampler | |
| shuffle: true | |
| sort_size: 512 | |
| face_size: 512 | |
| batch_type: token | |
| batch_size: 3000 | |
| batch_size_token_max: 20000 | |
| batch_size_sample_max: 100 | |
| max_token_length: 5000 | |
| max_text_length: 300 | |
| batch_size_scale_threshold: 3000 | |
| num_workers: 20 | |
| retry: 100 | |
| specaug: FunCineForgeSpecAug | |
| specaug_conf: | |
| apply_time_warp: false | |
| apply_freq_mask: false | |
| apply_time_mask: true | |
| time_mask_width_ratio_range: | |
| - 0 | |
| - 0.05 | |
| num_time_mask: 10 | |
| fill_value: -100 | |
| tokenizer: FunCineForgeTokenizer | |
| tokenizer_conf: | |
| init_param_path: ${llm_conf.init_param_path} | |
| face_encoder: FaceRecIR101 | |
| face_encoder_conf: | |
| init_param_path: pretrained_models/face_recog_ir101.onnx | |
| enable_tf32: true | |
| debug: false | |
| device: cpu | |