| # parameters to set | |
| model_cfg: | |
| init_from_hub_path: openai/whisper-large-v2 | |
| # lang: None | |
| # apply_spec_augment: True | |
| # mask_time_prob: 0.05 | |
| # mask_feature_prob: 0.05 | |
| # mask_time_length: 40 | |
| # mask_feature_length: 30 | |
| # mask_time_min_masks: 2 | |
| # mask_feature_min_masks: 2 | |
| data_cfg: | |
| data_root: ~/corpora/ | |
| train_manif: ~/corpora/data_manifests/ASR/PUBLIC_KIDS_TRAIN_v4_deduped.csv | |
| val_manif: # small private dataset of classroom speech, only affects training if load_best_model_at_end: True | |
| test_manif: # small private dataset of classroom speech, doesn't affect training | |
| experiment_cfg: | |
| OUT_DIR: train/whisat/save/publicKS_LoRA_int8 | |
| use_lora: True | |
| use_int8: True | |
| train_cfg: | |
| training_args: | |
| output_dir: !ref <experiment_cfg[OUT_DIR]> | |
| per_device_train_batch_size: 32 # 64 | |
| learning_rate: 0.0001 # 1e-5 orig, 1e-3 lora | |
| warmup_steps: 50 # 500 orig 50 lora | |
| num_train_epochs: 1 | |
| fp16: True # True | |
| evaluation_strategy: steps # or epochs | |
| per_device_eval_batch_size: 4 | |
| predict_with_generate: True | |
| generation_max_length: 112 | |
| save_steps: 500 | |
| eval_steps: 500 | |
| eval_accumulation_steps: 2 | |
| logging_steps: 25 | |
| report_to: | |
| - tensorboard | |
| load_best_model_at_end: False | |
| metric_for_best_model: wer | |
| greater_is_better: False | |
| push_to_hub: False | |
| remove_unused_columns: False # required as the PeftModel forward doesn't have the signature of the wrapped model's forward | |
| label_names: | |
| - labels | |