Training in progress - step 1500
Browse files- asr_config.py +1 -1
- asr_pipeline.py +1 -4
asr_config.py
CHANGED
|
@@ -71,7 +71,7 @@ class ASRConfig(transformers.PretrainedConfig):
|
|
| 71 |
"min_new_tokens": 0,
|
| 72 |
"repetition_penalty": 1.0,
|
| 73 |
"length_penalty": 1.0,
|
| 74 |
-
"no_repeat_ngram_size":
|
| 75 |
"use_cache": True,
|
| 76 |
}
|
| 77 |
|
|
|
|
| 71 |
"min_new_tokens": 0,
|
| 72 |
"repetition_penalty": 1.0,
|
| 73 |
"length_penalty": 1.0,
|
| 74 |
+
"no_repeat_ngram_size": 0,
|
| 75 |
"use_cache": True,
|
| 76 |
}
|
| 77 |
|
asr_pipeline.py
CHANGED
|
@@ -486,7 +486,6 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
|
|
| 486 |
return ""
|
| 487 |
|
| 488 |
original_len = len(text.split())
|
| 489 |
-
original_text = text # Keep for debug
|
| 490 |
|
| 491 |
# 1. LOWERCASE
|
| 492 |
text = text.lower()
|
|
@@ -506,10 +505,8 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
|
|
| 506 |
words = words[: idx + n]
|
| 507 |
text = " ".join(words)
|
| 508 |
print(
|
| 509 |
-
f"[
|
| 510 |
)
|
| 511 |
-
print(f"[POSTPROCESS] Before: {original_text[:100]}...")
|
| 512 |
-
print(f"[POSTPROCESS] After: {text[:100]}...")
|
| 513 |
break
|
| 514 |
|
| 515 |
# 3. COMBINE ACRONYMS
|
|
|
|
| 486 |
return ""
|
| 487 |
|
| 488 |
original_len = len(text.split())
|
|
|
|
| 489 |
|
| 490 |
# 1. LOWERCASE
|
| 491 |
text = text.lower()
|
|
|
|
| 505 |
words = words[: idx + n]
|
| 506 |
text = " ".join(words)
|
| 507 |
print(
|
| 508 |
+
f"[DEBUG] Truncated repetition: {original_len} -> {len(words)} words (n={n}, repeats={repeat_count})"
|
| 509 |
)
|
|
|
|
|
|
|
| 510 |
break
|
| 511 |
|
| 512 |
# 3. COMBINE ACRONYMS
|