macbert4mdcspell_v3 / csc.config
Macropodus's picture
Upload 11 files
8b6a251 verified
{
"pretrained_model_name_or_path": "hfl/chinese-macbert-base",
"path_relm": "",
"path_train": "train.v15.train.json.zh_punct",
"path_dev": "dev.v10.dev.json.zh_punct",
"path_tet": "test.json.zh_punct",
"model_save_path": "../output/csc_merge_9_public_of_mdcspell_add_v10_extend_2000w_pinyin_init_v5_bs32_lr2e5_epoch7_recall",
"task_name": "csc_merge_9_public_of_mdcspell_add_v10_extend_2000w_pinyin_init_v5_bs32_lr2e5_epoch7_recall",
"use_fast_tokenizer": true,
"do_lower_case": true,
"do_train": true,
"do_eval": true,
"do_test": true,
"gradient_accumulation_steps": 4,
"warmup_proportion": null,
"num_warmup_steps": 128,
"max_train_steps": 1101576,
"num_train_epochs": 7,
"train_batch_size": 32,
"eval_batch_size": 32,
"learning_rate": 3e-05,
"max_seq_length": 128,
"max_grad_norm": 1.0,
"weight_decay": 0.0005,
"save_steps": 10000,
"anchor": null,
"seed": 42,
"lr_scheduler_type": "cosine",
"loss_type": "focal_loss",
"mask_mode": "noerror",
"loss_det_rate": 0.15,
"prompt_length": 0,
"mask_rate": 0.15,
"threshold": 0.5,
"flag_dynamic_encode": false,
"flag_loss_period": false,
"flag_cpo_loss": false,
"flag_pin_memory": true,
"flag_train": false,
"flag_fp16": false,
"flag_cuda": true,
"flag_skip": true,
"flag_mft": true,
"num_workers": 4,
"CUDA_VISIBLE_DEVICES": "0",
"USE_TORCH": "1"
}