mazesmazes commited on
Commit
83fef99
·
verified ·
1 Parent(s): c19c0e3

Training in progress, step 500

Browse files
Files changed (2) hide show
  1. config.json +5 -5
  2. model.safetensors +2 -2
config.json CHANGED
@@ -160,7 +160,7 @@
160
  "inference_warmup_tokens": 10,
161
  "label_smoothing": 0.0,
162
  "length_penalty": 1.0,
163
- "llm_dim": 1024,
164
  "max_new_tokens": 96,
165
  "model_dtype": "bfloat16",
166
  "model_type": "asr_model",
@@ -185,7 +185,7 @@
185
  "router_aux_loss_coef": 0.01,
186
  "system_prompt": "/no_think /system_override",
187
  "text_config": {
188
- "_name_or_path": "Qwen/Qwen3-0.6B",
189
  "architectures": [
190
  "Qwen3ForCausalLM"
191
  ],
@@ -195,9 +195,9 @@
195
  "eos_token_id": 151645,
196
  "head_dim": 128,
197
  "hidden_act": "silu",
198
- "hidden_size": 1024,
199
  "initializer_range": 0.02,
200
- "intermediate_size": 3072,
201
  "layer_types": [
202
  "full_attention",
203
  "full_attention",
@@ -246,7 +246,7 @@
246
  "use_sliding_window": false,
247
  "vocab_size": 151670
248
  },
249
- "text_model_id": "Qwen/Qwen3-0.6B",
250
  "transformers_version": "5.0.0.dev0",
251
  "use_cache": false,
252
  "use_specaugment": true,
 
160
  "inference_warmup_tokens": 10,
161
  "label_smoothing": 0.0,
162
  "length_penalty": 1.0,
163
+ "llm_dim": 2048,
164
  "max_new_tokens": 96,
165
  "model_dtype": "bfloat16",
166
  "model_type": "asr_model",
 
185
  "router_aux_loss_coef": 0.01,
186
  "system_prompt": "/no_think /system_override",
187
  "text_config": {
188
+ "_name_or_path": "Qwen/Qwen3-1.7B",
189
  "architectures": [
190
  "Qwen3ForCausalLM"
191
  ],
 
195
  "eos_token_id": 151645,
196
  "head_dim": 128,
197
  "hidden_act": "silu",
198
+ "hidden_size": 2048,
199
  "initializer_range": 0.02,
200
+ "intermediate_size": 6144,
201
  "layer_types": [
202
  "full_attention",
203
  "full_attention",
 
246
  "use_sliding_window": false,
247
  "vocab_size": 151670
248
  },
249
+ "text_model_id": "Qwen/Qwen3-1.7B",
250
  "transformers_version": "5.0.0.dev0",
251
  "use_cache": false,
252
  "use_specaugment": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4f1083de8fd34af265b1d0a74a11b9b907782012a3c26f2eeb1bfe1dce6451a
3
- size 12583152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75181d026149248d64cd1870a42401228a0dffb60c08d1c6ebd934ea2d642a47
3
+ size 58732960