mazesmazes
/

tiny-audio

Automatic Speech Recognition

feature-extraction

speech-recognition

Model card Files Files and versions

mazesmazes commited on 14 days ago

Commit

030c3cb

·

verified ·

1 Parent(s): a0db2d6

Training in progress - step 500

Files changed (3) hide show

asr_modeling.py +1 -4
config.json +6 -3
model.safetensors +1 -1

asr_modeling.py CHANGED Viewed

@@ -185,7 +185,7 @@ class ASRModel(PreTrainedModel, GenerationMixin):
         decoder_kwargs = {
             "attn_implementation": config.attn_implementation,
             "trust_remote_code": True,
-            "tie_word_embeddings": True,
             "low_cpu_mem_usage": True,
             "dtype": dtype,
         }
@@ -247,9 +247,6 @@ class ASRModel(PreTrainedModel, GenerationMixin):
                 {"additional_special_tokens": existing_special + ["<audio>"]}
             )
             self.language_model.resize_token_embeddings(len(self.tokenizer), mean_resizing=False)
-            # Ensure lm_head stays tied to embeddings (e.g., SmolLM3)
-            if hasattr(self.language_model, "tie_weights"):
-                self.language_model.tie_weights()
         self.audio_token_id = self.tokenizer.convert_tokens_to_ids("<audio>")
         self.tokenizer.padding_side = "right"

         decoder_kwargs = {
             "attn_implementation": config.attn_implementation,
             "trust_remote_code": True,
+            "tie_word_embeddings": False,
             "low_cpu_mem_usage": True,
             "dtype": dtype,
         }
                 {"additional_special_tokens": existing_special + ["<audio>"]}
             )
             self.language_model.resize_token_embeddings(len(self.tokenizer), mean_resizing=False)
         self.audio_token_id = self.tokenizer.convert_tokens_to_ids("<audio>")
         self.tokenizer.padding_side = "right"

config.json CHANGED Viewed

@@ -160,10 +160,13 @@
   "label_smoothing": 0.0,
   "length_penalty": 1.0,
   "llm_dim": 2048,
-  "lora_alpha": 128,
-  "lora_dropout": 0.05,
   "lora_r": 64,
-  "lora_target_modules": "all-linear",
   "max_new_tokens": 256,
   "model_dtype": "bfloat16",
   "model_type": "asr_model",

   "label_smoothing": 0.0,
   "length_penalty": 1.0,
   "llm_dim": 2048,
+  "lora_alpha": 32,
+  "lora_dropout": 0.0,
   "lora_r": 64,
+  "lora_target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
   "max_new_tokens": 256,
   "model_dtype": "bfloat16",
   "model_type": "asr_model",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31fafce011ced7b60b18e6b1a5397f393667175eeea574075f5943b936b4f84a
 size 58732960

 version https://git-lfs.github.com/spec/v1
+oid sha256:67a6855d7069aeba393046787438bccc6f944475c5db6e480e54f3523e06d5e1
 size 58732960