mazesmazes commited on
Commit
ec97639
·
verified ·
1 Parent(s): bcc6b04

Update custom model files, README, and requirements

Browse files
Files changed (2) hide show
  1. asr_config.py +1 -1
  2. asr_pipeline.py +6 -5
asr_config.py CHANGED
@@ -61,7 +61,7 @@ class ASRConfig(transformers.PretrainedConfig):
61
  generation_defaults = {
62
  "num_beams": 1,
63
  "max_new_tokens": 256,
64
- "min_new_tokens": 1,
65
  "repetition_penalty": 1.0,
66
  "length_penalty": 1.0,
67
  "no_repeat_ngram_size": 0,
 
61
  generation_defaults = {
62
  "num_beams": 1,
63
  "max_new_tokens": 256,
64
+ "min_new_tokens": 0,
65
  "repetition_penalty": 1.0,
66
  "length_penalty": 1.0,
67
  "no_repeat_ngram_size": 0,
asr_pipeline.py CHANGED
@@ -489,11 +489,11 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
489
  text = text.lower()
490
 
491
  # 2. REMOVE REPETITIVE LOOPS
492
- # If the model repeats the same phrase more than twice, cut it off.
493
  words = text.split()
494
  if len(words) > 10:
495
- # Check for repeating n-grams (1 to 4 words long)
496
- for n in range(1, 5):
497
  last_sequence = words[-n:]
498
  repeat_count = 0
499
  idx = len(words) - n
@@ -501,9 +501,10 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
501
  repeat_count += 1
502
  idx -= n
503
 
504
- # If more than 2 exact repetitions at the end, truncate
505
  if repeat_count > 2:
506
- text = " ".join(words[: idx + n])
 
507
  break
508
 
509
  # 3. STRIP WHITESPACE
 
489
  text = text.lower()
490
 
491
  # 2. REMOVE REPETITIVE LOOPS
492
+ # If the model repeats the same phrase more than twice, remove all repetitions.
493
  words = text.split()
494
  if len(words) > 10:
495
+ # Check for repeating n-grams (1 to 5 words long)
496
+ for n in range(1, 6):
497
  last_sequence = words[-n:]
498
  repeat_count = 0
499
  idx = len(words) - n
 
501
  repeat_count += 1
502
  idx -= n
503
 
504
+ # If more than 2 exact repetitions at the end, remove all of them
505
  if repeat_count > 2:
506
+ words = words[:idx]
507
+ text = " ".join(words)
508
  break
509
 
510
  # 3. STRIP WHITESPACE