mazesmazes
/

tiny-audio

@@ -212,8 +212,18 @@ class ASRModel(PreTrainedModel, GenerationMixin):
                         **cache_kwargs,
                     )
                 else:
-                    # No saved adapters - initialize fresh LoRA for training
-                    model._setup_lora(config)
             return model
         finally:
@@ -382,7 +392,6 @@ class ASRModel(PreTrainedModel, GenerationMixin):
             task_type="CAUSAL_LM",
         )
         self.language_model = get_peft_model(self.language_model, lora_config)
-        # LoRA params are trainable by default, base model stays frozen
     def _init_tokenizer(self, config: ASRConfig):
         """Initialize tokenizer with audio token."""

                         **cache_kwargs,
                     )
                 else:
+                    # No saved adapters - initialize fresh LLM LoRA for training
+                    from peft import LoraConfig, get_peft_model
+                    lora_config = LoraConfig(
+                        r=config.lora_rank,
+                        lora_alpha=config.lora_alpha,
+                        target_modules=config.lora_target_modules,
+                        lora_dropout=config.lora_dropout,
+                        bias="none",
+                        task_type="CAUSAL_LM",
+                    )
+                    model.language_model = get_peft_model(model.language_model, lora_config)
             return model
         finally:
             task_type="CAUSAL_LM",
         )
         self.language_model = get_peft_model(self.language_model, lora_config)
     def _init_tokenizer(self, config: ASRConfig):
         """Initialize tokenizer with audio token."""

asr_pipeline.py CHANGED Viewed

@@ -485,40 +485,16 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
         if not text:
             return ""
-        original_len = len(text.split())
-        original_text = text  # Keep for debug
         # 1. LOWERCASE
         text = text.lower()
-        # 2. REMOVE REPETITIVE LOOPS
-        # If the model repeats the same phrase, keep only one instance.
-        words = text.split()
-        for n in range(1, min(15, len(words) // 2 + 1)):
-            last_sequence = words[-n:]
-            repeat_count = 0
-            idx = len(words) - n
-            while idx >= n and words[idx - n : idx] == last_sequence:
-                repeat_count += 1
-                idx -= n
-            if repeat_count >= 1:
-                words = words[: idx + n]
-                text = " ".join(words)
-                print(
-                    f"[POSTPROCESS] Truncated repetition: {original_len} -> {len(words)} words (n={n}, repeats={repeat_count})"
-                )
-                print(f"[POSTPROCESS] Before: {original_text[:100]}...")
-                print(f"[POSTPROCESS] After: {text[:100]}...")
-                break
-        # 3. COMBINE ACRONYMS
         # Merge consecutive single letters into one word (e.g., "u s a" -> "usa")
         text = re.sub(r"\b([a-z])((?:\s+[a-z])+)\b", lambda m: m.group(0).replace(" ", ""), text)
-        # 4. NORMALIZE CURRENCY
         # Convert "eur X" to "X euros" for Whisper normalizer compatibility
         text = re.sub(r"\beur\s+(\d+)", r"\1 euros", text)
-        # 5. STRIP WHITESPACE
         return re.sub(r"\s+", " ", text).strip()

         if not text:
             return ""
         # 1. LOWERCASE
         text = text.lower()
+        # 2. COMBINE ACRONYMS
         # Merge consecutive single letters into one word (e.g., "u s a" -> "usa")
         text = re.sub(r"\b([a-z])((?:\s+[a-z])+)\b", lambda m: m.group(0).replace(" ", ""), text)
+        # 3. NORMALIZE CURRENCY
         # Convert "eur X" to "X euros" for Whisper normalizer compatibility
         text = re.sub(r"\beur\s+(\d+)", r"\1 euros", text)
+        # 4. STRIP WHITESPACE
         return re.sub(r"\s+", " ", text).strip()