Training in progress - step 500
Browse files- asr_modeling.py +1 -4
- config.json +6 -3
- model.safetensors +1 -1
asr_modeling.py
CHANGED
|
@@ -185,7 +185,7 @@ class ASRModel(PreTrainedModel, GenerationMixin):
|
|
| 185 |
decoder_kwargs = {
|
| 186 |
"attn_implementation": config.attn_implementation,
|
| 187 |
"trust_remote_code": True,
|
| 188 |
-
"tie_word_embeddings":
|
| 189 |
"low_cpu_mem_usage": True,
|
| 190 |
"dtype": dtype,
|
| 191 |
}
|
|
@@ -247,9 +247,6 @@ class ASRModel(PreTrainedModel, GenerationMixin):
|
|
| 247 |
{"additional_special_tokens": existing_special + ["<audio>"]}
|
| 248 |
)
|
| 249 |
self.language_model.resize_token_embeddings(len(self.tokenizer), mean_resizing=False)
|
| 250 |
-
# Ensure lm_head stays tied to embeddings (e.g., SmolLM3)
|
| 251 |
-
if hasattr(self.language_model, "tie_weights"):
|
| 252 |
-
self.language_model.tie_weights()
|
| 253 |
|
| 254 |
self.audio_token_id = self.tokenizer.convert_tokens_to_ids("<audio>")
|
| 255 |
self.tokenizer.padding_side = "right"
|
|
|
|
| 185 |
decoder_kwargs = {
|
| 186 |
"attn_implementation": config.attn_implementation,
|
| 187 |
"trust_remote_code": True,
|
| 188 |
+
"tie_word_embeddings": False,
|
| 189 |
"low_cpu_mem_usage": True,
|
| 190 |
"dtype": dtype,
|
| 191 |
}
|
|
|
|
| 247 |
{"additional_special_tokens": existing_special + ["<audio>"]}
|
| 248 |
)
|
| 249 |
self.language_model.resize_token_embeddings(len(self.tokenizer), mean_resizing=False)
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
self.audio_token_id = self.tokenizer.convert_tokens_to_ids("<audio>")
|
| 252 |
self.tokenizer.padding_side = "right"
|
config.json
CHANGED
|
@@ -160,10 +160,13 @@
|
|
| 160 |
"label_smoothing": 0.0,
|
| 161 |
"length_penalty": 1.0,
|
| 162 |
"llm_dim": 2048,
|
| 163 |
-
"lora_alpha":
|
| 164 |
-
"lora_dropout": 0.
|
| 165 |
"lora_r": 64,
|
| 166 |
-
"lora_target_modules":
|
|
|
|
|
|
|
|
|
|
| 167 |
"max_new_tokens": 256,
|
| 168 |
"model_dtype": "bfloat16",
|
| 169 |
"model_type": "asr_model",
|
|
|
|
| 160 |
"label_smoothing": 0.0,
|
| 161 |
"length_penalty": 1.0,
|
| 162 |
"llm_dim": 2048,
|
| 163 |
+
"lora_alpha": 32,
|
| 164 |
+
"lora_dropout": 0.0,
|
| 165 |
"lora_r": 64,
|
| 166 |
+
"lora_target_modules": [
|
| 167 |
+
"v_proj",
|
| 168 |
+
"q_proj"
|
| 169 |
+
],
|
| 170 |
"max_new_tokens": 256,
|
| 171 |
"model_dtype": "bfloat16",
|
| 172 |
"model_type": "asr_model",
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 58732960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67a6855d7069aeba393046787438bccc6f944475c5db6e480e54f3523e06d5e1
|
| 3 |
size 58732960
|