mazesmazes commited on
Commit
030c3cb
·
verified ·
1 Parent(s): a0db2d6

Training in progress - step 500

Browse files
Files changed (3) hide show
  1. asr_modeling.py +1 -4
  2. config.json +6 -3
  3. model.safetensors +1 -1
asr_modeling.py CHANGED
@@ -185,7 +185,7 @@ class ASRModel(PreTrainedModel, GenerationMixin):
185
  decoder_kwargs = {
186
  "attn_implementation": config.attn_implementation,
187
  "trust_remote_code": True,
188
- "tie_word_embeddings": True,
189
  "low_cpu_mem_usage": True,
190
  "dtype": dtype,
191
  }
@@ -247,9 +247,6 @@ class ASRModel(PreTrainedModel, GenerationMixin):
247
  {"additional_special_tokens": existing_special + ["<audio>"]}
248
  )
249
  self.language_model.resize_token_embeddings(len(self.tokenizer), mean_resizing=False)
250
- # Ensure lm_head stays tied to embeddings (e.g., SmolLM3)
251
- if hasattr(self.language_model, "tie_weights"):
252
- self.language_model.tie_weights()
253
 
254
  self.audio_token_id = self.tokenizer.convert_tokens_to_ids("<audio>")
255
  self.tokenizer.padding_side = "right"
 
185
  decoder_kwargs = {
186
  "attn_implementation": config.attn_implementation,
187
  "trust_remote_code": True,
188
+ "tie_word_embeddings": False,
189
  "low_cpu_mem_usage": True,
190
  "dtype": dtype,
191
  }
 
247
  {"additional_special_tokens": existing_special + ["<audio>"]}
248
  )
249
  self.language_model.resize_token_embeddings(len(self.tokenizer), mean_resizing=False)
 
 
 
250
 
251
  self.audio_token_id = self.tokenizer.convert_tokens_to_ids("<audio>")
252
  self.tokenizer.padding_side = "right"
config.json CHANGED
@@ -160,10 +160,13 @@
160
  "label_smoothing": 0.0,
161
  "length_penalty": 1.0,
162
  "llm_dim": 2048,
163
- "lora_alpha": 128,
164
- "lora_dropout": 0.05,
165
  "lora_r": 64,
166
- "lora_target_modules": "all-linear",
 
 
 
167
  "max_new_tokens": 256,
168
  "model_dtype": "bfloat16",
169
  "model_type": "asr_model",
 
160
  "label_smoothing": 0.0,
161
  "length_penalty": 1.0,
162
  "llm_dim": 2048,
163
+ "lora_alpha": 32,
164
+ "lora_dropout": 0.0,
165
  "lora_r": 64,
166
+ "lora_target_modules": [
167
+ "v_proj",
168
+ "q_proj"
169
+ ],
170
  "max_new_tokens": 256,
171
  "model_dtype": "bfloat16",
172
  "model_type": "asr_model",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31fafce011ced7b60b18e6b1a5397f393667175eeea574075f5943b936b4f84a
3
  size 58732960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a6855d7069aeba393046787438bccc6f944475c5db6e480e54f3523e06d5e1
3
  size 58732960