Training in progress, step 1000
Browse files- config.json +5 -6
- generation_config.json +1 -4
- model.safetensors +1 -1
config.json
CHANGED
|
@@ -236,7 +236,6 @@
|
|
| 236 |
"encoder_dim": 1280,
|
| 237 |
"freeze_projector": false,
|
| 238 |
"freq_mask_length": 27,
|
| 239 |
-
"inference_warmup_tokens": 10,
|
| 240 |
"label_smoothing": 0.0,
|
| 241 |
"length_penalty": 1.0,
|
| 242 |
"llm_dim": 2048,
|
|
@@ -252,7 +251,7 @@
|
|
| 252 |
"up_proj",
|
| 253 |
"down_proj"
|
| 254 |
],
|
| 255 |
-
"max_new_tokens":
|
| 256 |
"min_new_tokens": 0,
|
| 257 |
"model_dtype": "bfloat16",
|
| 258 |
"model_type": "asr_model",
|
|
@@ -278,7 +277,7 @@
|
|
| 278 |
"repetition_penalty": 1.1,
|
| 279 |
"router_aux_loss_coef": 0.01,
|
| 280 |
"system_prompt": "",
|
| 281 |
-
"temperature":
|
| 282 |
"text_config": {
|
| 283 |
"_name_or_path": "HuggingFaceTB/SmolLM3-3B",
|
| 284 |
"architectures": [
|
|
@@ -392,11 +391,11 @@
|
|
| 392 |
},
|
| 393 |
"text_model_id": "HuggingFaceTB/SmolLM3-3B",
|
| 394 |
"time_mask_length": 100,
|
| 395 |
-
"top_k":
|
| 396 |
-
"top_p":
|
| 397 |
"transformers_version": "5.0.0",
|
| 398 |
"use_cache": false,
|
| 399 |
"use_lora": false,
|
| 400 |
"use_specaugment": true,
|
| 401 |
"vocab_size": 128257
|
| 402 |
-
}
|
|
|
|
| 236 |
"encoder_dim": 1280,
|
| 237 |
"freeze_projector": false,
|
| 238 |
"freq_mask_length": 27,
|
|
|
|
| 239 |
"label_smoothing": 0.0,
|
| 240 |
"length_penalty": 1.0,
|
| 241 |
"llm_dim": 2048,
|
|
|
|
| 251 |
"up_proj",
|
| 252 |
"down_proj"
|
| 253 |
],
|
| 254 |
+
"max_new_tokens": 128,
|
| 255 |
"min_new_tokens": 0,
|
| 256 |
"model_dtype": "bfloat16",
|
| 257 |
"model_type": "asr_model",
|
|
|
|
| 277 |
"repetition_penalty": 1.1,
|
| 278 |
"router_aux_loss_coef": 0.01,
|
| 279 |
"system_prompt": "",
|
| 280 |
+
"temperature": null,
|
| 281 |
"text_config": {
|
| 282 |
"_name_or_path": "HuggingFaceTB/SmolLM3-3B",
|
| 283 |
"architectures": [
|
|
|
|
| 391 |
},
|
| 392 |
"text_model_id": "HuggingFaceTB/SmolLM3-3B",
|
| 393 |
"time_mask_length": 100,
|
| 394 |
+
"top_k": null,
|
| 395 |
+
"top_p": null,
|
| 396 |
"transformers_version": "5.0.0",
|
| 397 |
"use_cache": false,
|
| 398 |
"use_lora": false,
|
| 399 |
"use_specaugment": true,
|
| 400 |
"vocab_size": 128257
|
| 401 |
+
}
|
generation_config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"bos_token_id": 128000,
|
| 3 |
-
"do_sample":
|
| 4 |
"eos_token_id": [
|
| 5 |
128012
|
| 6 |
],
|
|
@@ -11,9 +11,6 @@
|
|
| 11 |
"num_beams": 1,
|
| 12 |
"pad_token_id": 128004,
|
| 13 |
"repetition_penalty": 1.1,
|
| 14 |
-
"temperature": 1.0,
|
| 15 |
-
"top_k": 0,
|
| 16 |
-
"top_p": 0.01,
|
| 17 |
"transformers_version": "5.0.0",
|
| 18 |
"use_cache": true
|
| 19 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"bos_token_id": 128000,
|
| 3 |
+
"do_sample": false,
|
| 4 |
"eos_token_id": [
|
| 5 |
128012
|
| 6 |
],
|
|
|
|
| 11 |
"num_beams": 1,
|
| 12 |
"pad_token_id": 128004,
|
| 13 |
"repetition_penalty": 1.1,
|
|
|
|
|
|
|
|
|
|
| 14 |
"transformers_version": "5.0.0",
|
| 15 |
"use_cache": true
|
| 16 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14682440
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dfa2853ff5542905d337e8aeff8cbf60143d732b14880ef5d06688db14f9b80
|
| 3 |
size 14682440
|