flexthink
/

soundchoice-g2p

@@ -46,6 +46,7 @@ lexicon_batch_size: 1024
 sentence_batch_size: 32
 homograph_batch_size: 32
 ctc_weight: 0.5
 homograph_loss_weight: 2.0
 lr: 0.002
 save_for_pretrained: true
@@ -97,7 +98,6 @@ lm_layers: 2 # number of hidden layers
 lm_output_neurons: 43
 # Beam Searcher
-use_language_model: false
 beam_search_min_decode_ratio: 0
 beam_search_max_decode_ratio: 1.0
 beam_search_beam_size: 16
@@ -268,7 +268,7 @@ modules:
   lin: *id010
   ctc_lin: *id013
   out: *id011
-  word_emb:
   word_emb_enc: *id012
 model: *id014
 lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
@@ -281,61 +281,48 @@ lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
 opt_class: !name:torch.optim.Adam
   lr: 0.002
 beam_searcher: &id029 !new:speechbrain.decoders.S2SRNNBeamSearcher
   embedding: *id008
   decoder: *id009
   linear: *id010
-  ctc_linear: *id013
-  bos_index: 0
-  eos_index: 1
-  blank_index: 2
   min_decode_ratio: 0
   max_decode_ratio: 1.0
   beam_size: 16
   eos_threshold: 10.0
   using_max_attn_shift: false
   max_attn_shift: 10
-  coverage_penalty: 5.0
-  ctc_weight: 0.4
 beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
   embedding: *id008
   decoder: *id009
   linear: *id010
-  ctc_linear: *id013
-  bos_index: 0
-  eos_index: 1
-  blank_index: 2
   min_decode_ratio: 0
   max_decode_ratio: 1.0
   beam_size: 16
   eos_threshold: 10.0
   using_max_attn_shift: false
   max_attn_shift: 10
-  coverage_penalty: 5.0
-  ctc_weight: 0.4
-beam_searcher_lm: !new:speechbrain.decoders.seq2seq.S2SRNNBeamSearchLM
-  embedding: *id008
-  decoder: *id009
-  linear: *id010
-  ctc_linear: *id013
-  language_model: *id015
-  bos_index: 0
-  eos_index: 1
-  blank_index: 2
-  min_decode_ratio: 0
-  max_decode_ratio: 1.0
-  beam_size: 16
-  eos_threshold: 10.0
-  using_max_attn_shift: false
-  max_attn_shift: 10
-  coverage_penalty: 5.0
-  ctc_weight: 0.4
-  lm_weight: 0.5
-  temperature: 1.25
-  temperature_lm: 1.0
 lr_annealing: &id018 !new:speechbrain.nnet.schedulers.NewBobScheduler
   initial_value: 0.002

 sentence_batch_size: 32
 homograph_batch_size: 32
 ctc_weight: 0.5
+ctc_window_size: 0
 homograph_loss_weight: 2.0
 lr: 0.002
 save_for_pretrained: true
 lm_output_neurons: 43
 # Beam Searcher
 beam_search_min_decode_ratio: 0
 beam_search_max_decode_ratio: 1.0
 beam_search_beam_size: 16
   lin: *id010
   ctc_lin: *id013
   out: *id011
+  word_emb: !ref <word_emb>
   word_emb_enc: *id012
 model: *id014
 lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
 opt_class: !name:torch.optim.Adam
   lr: 0.002
+ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
+  eos_index: !ref <eos_index>
+  blank_index: !ref <blank_index>
+  ctc_fc: !ref <ctc_lin>
+  ctc_window_size: !ref <ctc_window_size>
+coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
+   vocab_size: !ref <output_neurons>
+scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
+   full_scorers: [!ref <coverage_scorer>, !ref <ctc_scorer>]
+   weights:
+      coverage: !ref <beam_search_coverage_penalty>
+      ctc: !ref <ctc_weight>
 beam_searcher: &id029 !new:speechbrain.decoders.S2SRNNBeamSearcher
   embedding: *id008
   decoder: *id009
   linear: *id010
+  bos_index: !ref <bos_index>
+  eos_index: !ref <eos_index>
   min_decode_ratio: 0
   max_decode_ratio: 1.0
   beam_size: 16
   eos_threshold: 10.0
   using_max_attn_shift: false
   max_attn_shift: 10
+  scorer: !ref <scorer>
 beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
   embedding: *id008
   decoder: *id009
   linear: *id010
+  bos_index: !ref <bos_index>
+  eos_index: !ref <eos_index>
   min_decode_ratio: 0
   max_decode_ratio: 1.0
   beam_size: 16
   eos_threshold: 10.0
   using_max_attn_shift: false
   max_attn_shift: 10
+  scorer: !ref <scorer>
 lr_annealing: &id018 !new:speechbrain.nnet.schedulers.NewBobScheduler
   initial_value: 0.002