levicu
/

whisat-base

Automatic Speech Recognition

classroom_speech

Model card Files Files and versions

rosyvs commited on Sep 29, 2025

Commit

9c7b12b

·

verified ·

1 Parent(s): e2c66bc

upload yaml used fro training

Files changed (1) hide show

publicKS_ig_hf_base.yaml +52 -0

publicKS_ig_hf_base.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+# parameters to set
+model_cfg:
+  init_from_hub_path: openai/whisper-base # openai/whisper-large-v2
+  # lang: None
+  # apply_spec_augment: True
+  # mask_time_prob: 0.05
+  # mask_feature_prob: 0.05
+  # mask_time_length: 40
+  # mask_feature_length: 30
+  # mask_time_min_masks: 2
+  # mask_feature_min_masks: 2
+data_cfg:
+  data_root: /home/rosy/corpora/
+  train_manif: /home/rosy/corpora/data_manifests/ASR/PUBLIC_KIDS_TRAIN_v4_deduped.csv
+  val_manif: /home/rosy/corpora/data_manifests/ASR/ISAT-SI_stratified_DEV_nonempty_frac1of4.csv
+  test_manif: /home/rosy/corpora/data_manifests/ASR/ISAT-SI_stratified_TEST_nonempty.csv
+experiment_cfg:
+  OUT_DIR: train/whisat/save/publicKS_base
+  use_lora: False
+  use_int8: False
+train_cfg:
+  training_args:
+    output_dir: !ref <experiment_cfg[OUT_DIR]>
+    per_device_train_batch_size: 16
+    # gradient_accumulation_steps:# 8, # 1  # increase by 2x for every 2x decrease in batch size
+    learning_rate: 0.000003 # 1e-5 orig, 1e-3 lora
+    warmup_steps: 50 # 500 orig 50 lora
+    num_train_epochs: 1
+    # gradient_checkpointing:True TODO: has to be false for LoRA??
+    fp16: True # True
+    evaluation_strategy: steps # or epochs
+    per_device_eval_batch_size: 4
+    predict_with_generate: True
+    generation_max_length: 112
+    save_steps: 500
+    eval_steps: 500
+    eval_accumulation_steps: 2
+    logging_steps: 25
+    report_to:
+      - tensorboard
+    load_best_model_at_end: False
+    metric_for_best_model: wer
+    greater_is_better: False
+    push_to_hub: False
+    remove_unused_columns: False  # required as the PeftModel forward doesn't have the signature of the wrapped model's forward
+    label_names:
+      - labels