alternative model version

Files changed (7) hide show

.gitattributes +1 -0
model.pth +2 -2
old_model/model.pth +3 -0
old_model/train.yaml +0 -0
old_model/train_config.yaml +101 -0
train.yaml +3 -2
train_config.yaml +1 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 model.pth filter=lfs diff=lfs merge=lfs -text
 bpe.model filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 model.pth filter=lfs diff=lfs merge=lfs -text
 bpe.model filter=lfs diff=lfs merge=lfs -text
+old_model/model.pth filter=lfs diff=lfs merge=lfs -text

model.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e3a313e405292897dbe70875d084c7facce1f36e924e7b6793b930bb3a3010e
-size 723062244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fd2f27c419742d03f205dc5d38364ab5c9c892f4b0bfdfe510244ce92073dea
+size 723052874

old_model/model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3a313e405292897dbe70875d084c7facce1f36e924e7b6793b930bb3a3010e
+size 723062244

old_model/train.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

old_model/train_config.yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+batch_size: 16
+accum_grad: 128
+max_epoch: 150
+patience: none
+# The initialization method for model parameters
+init: xavier_uniform
+best_model_criterion:
+-   - valid
+    - acc_asr  #
+    - max
+keep_nbest_models: 10
+encoder: conformer
+encoder_conf:
+    output_size: 512
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 12
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: conv2d
+    normalize_before: true
+    macaron_style: true
+    rel_pos_type: latest
+    pos_enc_layer_type: rel_pos
+    selfattention_layer_type: rel_selfattn
+    activation_type: swish
+    use_cnn_module: true
+    cnn_module_kernel: 31
+    interctc_layer_idx: [6]
+decoder: multi_transformer  #mlm
+decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+    return_hidden: true
+subtitle_encoder: transformer
+subtitle_encoder_conf:
+    output_size: 512
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: none
+    normalize_before: true
+    macaron_style: true
+subtitle_decoder: multi_transformer
+subtitle_decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+model_conf:
+    asr_weight: 0.5
+    subs_weight: 0.5
+    ctc_weight: 0.3  # = CTC weight
+    interctc_weight: 0.3
+    lsm_weight_asr: 0.1
+    lsm_weight_mt: 0.1
+    length_normalized_loss: false
+    condition_subtitle_decoder: true
+    condition_asr_decoder: true
+    use_asr_feats: "encoder"
+optim: adam
+optim_conf:
+    lr: 0.003
+scheduler: warmuplr
+scheduler_conf:
+    warmup_steps: 100000
+specaug: specaug
+specaug_conf:
+    apply_time_warp: true
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 30
+    num_freq_mask: 2
+    apply_time_mask: true
+    time_mask_width_range:
+    - 0
+    - 40
+    num_time_mask: 2

train.yaml CHANGED Viewed

@@ -4,13 +4,13 @@ log_level: INFO
 dry_run: false
 iterator_type: sequence
 batch_asr_ratio: null
-output_dir: /esat/spchtemp/scratch/jponcele/espnet2/exp/exp-hpc/st_train_subtitling_chained_PL_C10_new_combined_nelf_all_dim512_iter10k
 ngpu: 1
 seed: 0
 num_workers: 1
 num_att_plot: 0
 dist_backend: nccl
-dist_init_method: file:///dodrio/scratch/projects/starting_2023_035/experiments/espnet2/exp/exp-new-combined/st_train_subtitling_chained_PL_C8_new_combined_nelf_all_dim512_iter10k/.dist_init_eba45741-b85b-43d0-b471-bda370ed01d6
 dist_world_size: 16
 dist_rank: 0
 local_rank: 0
@@ -10145,6 +10145,7 @@ model_conf:
     condition_subtitle_decoder: true
     condition_asr_decoder: true
     use_asr_feats: encoder
 use_preprocessor: true
 token_type: bpe
 src_token_type: bpe

 dry_run: false
 iterator_type: sequence
 batch_asr_ratio: null
+output_dir: /esat/spchtemp/scratch/jponcele/espnet2/exp/exp-hpc/st_train_subtitling_chained_PL_C10_new_combined_nelf_all_dim512_fix_conditioning_iter10k
 ngpu: 1
 seed: 0
 num_workers: 1
 num_att_plot: 0
 dist_backend: nccl
+dist_init_method: file:///dodrio/scratch/projects/starting_2023_035/experiments/espnet2/exp/exp-new-combined/st_train_subtitling_chained_PL_C8_new_combined_nelf_all_dim512_fix_conditioning_iter10k/.dist_init_8e4c5c43-7800-495a-a33a-312f06960745
 dist_world_size: 16
 dist_rank: 0
 local_rank: 0
     condition_subtitle_decoder: true
     condition_asr_decoder: true
     use_asr_feats: encoder
+    fix_conditioning: true
 use_preprocessor: true
 token_type: bpe
 src_token_type: bpe

train_config.yaml CHANGED Viewed

@@ -74,6 +74,7 @@ model_conf:
     condition_subtitle_decoder: true
     condition_asr_decoder: true
     use_asr_feats: "encoder"
 optim: adam
 optim_conf:

     condition_subtitle_decoder: true
     condition_asr_decoder: true
     use_asr_feats: "encoder"
+    fix_conditioning: true
 optim: adam
 optim_conf: