mazesmazes commited on
Commit
ff71b8e
·
verified ·
1 Parent(s): 47b112b

Training in progress - step 1500

Browse files
Files changed (2) hide show
  1. asr_config.py +1 -1
  2. asr_pipeline.py +1 -4
asr_config.py CHANGED
@@ -71,7 +71,7 @@ class ASRConfig(transformers.PretrainedConfig):
71
  "min_new_tokens": 0,
72
  "repetition_penalty": 1.0,
73
  "length_penalty": 1.0,
74
- "no_repeat_ngram_size": 3, # Prevent repeating 3-grams like "so so so"
75
  "use_cache": True,
76
  }
77
 
 
71
  "min_new_tokens": 0,
72
  "repetition_penalty": 1.0,
73
  "length_penalty": 1.0,
74
+ "no_repeat_ngram_size": 0,
75
  "use_cache": True,
76
  }
77
 
asr_pipeline.py CHANGED
@@ -486,7 +486,6 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
486
  return ""
487
 
488
  original_len = len(text.split())
489
- original_text = text # Keep for debug
490
 
491
  # 1. LOWERCASE
492
  text = text.lower()
@@ -506,10 +505,8 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
506
  words = words[: idx + n]
507
  text = " ".join(words)
508
  print(
509
- f"[POSTPROCESS] Truncated repetition: {original_len} -> {len(words)} words (n={n}, repeats={repeat_count})"
510
  )
511
- print(f"[POSTPROCESS] Before: {original_text[:100]}...")
512
- print(f"[POSTPROCESS] After: {text[:100]}...")
513
  break
514
 
515
  # 3. COMBINE ACRONYMS
 
486
  return ""
487
 
488
  original_len = len(text.split())
 
489
 
490
  # 1. LOWERCASE
491
  text = text.lower()
 
505
  words = words[: idx + n]
506
  text = " ".join(words)
507
  print(
508
+ f"[DEBUG] Truncated repetition: {original_len} -> {len(words)} words (n={n}, repeats={repeat_count})"
509
  )
 
 
510
  break
511
 
512
  # 3. COMBINE ACRONYMS