rosyvs commited on
Commit
9c7b12b
·
verified ·
1 Parent(s): e2c66bc

upload yaml used fro training

Browse files
Files changed (1) hide show
  1. publicKS_ig_hf_base.yaml +52 -0
publicKS_ig_hf_base.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters to set
2
+
3
+ model_cfg:
4
+ init_from_hub_path: openai/whisper-base # openai/whisper-large-v2
5
+ # lang: None
6
+ # apply_spec_augment: True
7
+ # mask_time_prob: 0.05
8
+ # mask_feature_prob: 0.05
9
+ # mask_time_length: 40
10
+ # mask_feature_length: 30
11
+ # mask_time_min_masks: 2
12
+ # mask_feature_min_masks: 2
13
+
14
+ data_cfg:
15
+ data_root: /home/rosy/corpora/
16
+ train_manif: /home/rosy/corpora/data_manifests/ASR/PUBLIC_KIDS_TRAIN_v4_deduped.csv
17
+ val_manif: /home/rosy/corpora/data_manifests/ASR/ISAT-SI_stratified_DEV_nonempty_frac1of4.csv
18
+ test_manif: /home/rosy/corpora/data_manifests/ASR/ISAT-SI_stratified_TEST_nonempty.csv
19
+
20
+ experiment_cfg:
21
+ OUT_DIR: train/whisat/save/publicKS_base
22
+ use_lora: False
23
+ use_int8: False
24
+
25
+ train_cfg:
26
+ training_args:
27
+ output_dir: !ref <experiment_cfg[OUT_DIR]>
28
+ per_device_train_batch_size: 16
29
+ # gradient_accumulation_steps:# 8, # 1 # increase by 2x for every 2x decrease in batch size
30
+ learning_rate: 0.000003 # 1e-5 orig, 1e-3 lora
31
+ warmup_steps: 50 # 500 orig 50 lora
32
+ num_train_epochs: 1
33
+ # gradient_checkpointing:True TODO: has to be false for LoRA??
34
+ fp16: True # True
35
+ evaluation_strategy: steps # or epochs
36
+ per_device_eval_batch_size: 4
37
+ predict_with_generate: True
38
+ generation_max_length: 112
39
+ save_steps: 500
40
+ eval_steps: 500
41
+ eval_accumulation_steps: 2
42
+ logging_steps: 25
43
+ report_to:
44
+ - tensorboard
45
+ load_best_model_at_end: False
46
+ metric_for_best_model: wer
47
+ greater_is_better: False
48
+ push_to_hub: False
49
+ remove_unused_columns: False # required as the PeftModel forward doesn't have the signature of the wrapped model's forward
50
+ label_names:
51
+ - labels
52
+