upload yaml used fro training
Browse files- publicKS_ig_hf_base.yaml +52 -0
publicKS_ig_hf_base.yaml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parameters to set
|
| 2 |
+
|
| 3 |
+
model_cfg:
|
| 4 |
+
init_from_hub_path: openai/whisper-base # openai/whisper-large-v2
|
| 5 |
+
# lang: None
|
| 6 |
+
# apply_spec_augment: True
|
| 7 |
+
# mask_time_prob: 0.05
|
| 8 |
+
# mask_feature_prob: 0.05
|
| 9 |
+
# mask_time_length: 40
|
| 10 |
+
# mask_feature_length: 30
|
| 11 |
+
# mask_time_min_masks: 2
|
| 12 |
+
# mask_feature_min_masks: 2
|
| 13 |
+
|
| 14 |
+
data_cfg:
|
| 15 |
+
data_root: /home/rosy/corpora/
|
| 16 |
+
train_manif: /home/rosy/corpora/data_manifests/ASR/PUBLIC_KIDS_TRAIN_v4_deduped.csv
|
| 17 |
+
val_manif: /home/rosy/corpora/data_manifests/ASR/ISAT-SI_stratified_DEV_nonempty_frac1of4.csv
|
| 18 |
+
test_manif: /home/rosy/corpora/data_manifests/ASR/ISAT-SI_stratified_TEST_nonempty.csv
|
| 19 |
+
|
| 20 |
+
experiment_cfg:
|
| 21 |
+
OUT_DIR: train/whisat/save/publicKS_base
|
| 22 |
+
use_lora: False
|
| 23 |
+
use_int8: False
|
| 24 |
+
|
| 25 |
+
train_cfg:
|
| 26 |
+
training_args:
|
| 27 |
+
output_dir: !ref <experiment_cfg[OUT_DIR]>
|
| 28 |
+
per_device_train_batch_size: 16
|
| 29 |
+
# gradient_accumulation_steps:# 8, # 1 # increase by 2x for every 2x decrease in batch size
|
| 30 |
+
learning_rate: 0.000003 # 1e-5 orig, 1e-3 lora
|
| 31 |
+
warmup_steps: 50 # 500 orig 50 lora
|
| 32 |
+
num_train_epochs: 1
|
| 33 |
+
# gradient_checkpointing:True TODO: has to be false for LoRA??
|
| 34 |
+
fp16: True # True
|
| 35 |
+
evaluation_strategy: steps # or epochs
|
| 36 |
+
per_device_eval_batch_size: 4
|
| 37 |
+
predict_with_generate: True
|
| 38 |
+
generation_max_length: 112
|
| 39 |
+
save_steps: 500
|
| 40 |
+
eval_steps: 500
|
| 41 |
+
eval_accumulation_steps: 2
|
| 42 |
+
logging_steps: 25
|
| 43 |
+
report_to:
|
| 44 |
+
- tensorboard
|
| 45 |
+
load_best_model_at_end: False
|
| 46 |
+
metric_for_best_model: wer
|
| 47 |
+
greater_is_better: False
|
| 48 |
+
push_to_hub: False
|
| 49 |
+
remove_unused_columns: False # required as the PeftModel forward doesn't have the signature of the wrapped model's forward
|
| 50 |
+
label_names:
|
| 51 |
+
- labels
|
| 52 |
+
|