mazesmazes commited on
Commit
81e332a
·
verified ·
1 Parent(s): 10ed507

Training in progress, step 1000

Browse files
adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "Qwen/Qwen3-0.6B",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "q_proj",
33
- "v_proj"
34
  ],
35
  "target_parameters": null,
36
  "task_type": "CAUSAL_LM",
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "checkpoint-1000",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "v_proj",
33
+ "q_proj"
34
  ],
35
  "target_parameters": null,
36
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4ddc5587a7c4404aef6ba30aa8c5e6068425b48c2f7bcf4b98172a4c4c027fe
3
  size 36715216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d802bbf1c6c65a249c3be723f05726005ce0c40b1791db6fd73cb6417e7d00
3
  size 36715216
config.json CHANGED
@@ -143,6 +143,126 @@
143
  },
144
  "downsample_rate": 5,
145
  "dtype": "bfloat16",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  "encoder_conv_layers": [
147
  [
148
  1,
@@ -169,11 +289,11 @@
169
  "v_proj"
170
  ],
171
  "mask_feature_length": 10,
172
- "mask_feature_min_masks": 0,
173
- "mask_feature_prob": 0.0,
174
- "mask_time_length": 10,
175
  "mask_time_min_masks": 2,
176
- "mask_time_prob": 0.05,
177
  "max_new_tokens": 128,
178
  "min_new_tokens": 0,
179
  "model_dtype": "bfloat16",
@@ -264,7 +384,6 @@
264
  "transformers_version": "5.0.0.dev0",
265
  "use_cache": false,
266
  "use_lora": true,
267
- "use_specaugment": false,
268
- "user_prompt": "Please transcribe this English audio into text: <audio>",
269
  "vocab_size": 151670
270
  }
 
143
  },
144
  "downsample_rate": 5,
145
  "dtype": "bfloat16",
146
+ "encoder": {
147
+ "_name_or_path": "zai-org/GLM-ASR-Nano-2512",
148
+ "architectures": [
149
+ "GlmAsrForConditionalGeneration"
150
+ ],
151
+ "audio_config": {
152
+ "_name_or_path": "",
153
+ "add_cross_attention": false,
154
+ "architectures": null,
155
+ "attention_dropout": 0.0,
156
+ "bos_token_id": null,
157
+ "chunk_size_feed_forward": 0,
158
+ "cross_attention_hidden_size": null,
159
+ "decoder_start_token_id": null,
160
+ "dtype": null,
161
+ "eos_token_id": null,
162
+ "finetuning_task": null,
163
+ "head_dim": 64,
164
+ "hidden_act": "gelu",
165
+ "hidden_size": 1280,
166
+ "id2label": {
167
+ "0": "LABEL_0",
168
+ "1": "LABEL_1"
169
+ },
170
+ "initializer_range": 0.02,
171
+ "intermediate_size": 5120,
172
+ "is_decoder": false,
173
+ "is_encoder_decoder": false,
174
+ "label2id": {
175
+ "LABEL_0": 0,
176
+ "LABEL_1": 1
177
+ },
178
+ "max_position_embeddings": 1500,
179
+ "model_type": "glmasr_encoder",
180
+ "num_attention_heads": 20,
181
+ "num_hidden_layers": 32,
182
+ "num_key_value_heads": 20,
183
+ "num_mel_bins": 128,
184
+ "output_attentions": false,
185
+ "output_hidden_states": false,
186
+ "pad_token_id": null,
187
+ "partial_rotary_factor": 0.5,
188
+ "prefix": null,
189
+ "problem_type": null,
190
+ "return_dict": true,
191
+ "rope_parameters": {
192
+ "partial_rotary_factor": 0.5,
193
+ "rope_theta": 10000.0,
194
+ "rope_type": "default"
195
+ },
196
+ "sep_token_id": null,
197
+ "task_specific_params": null,
198
+ "tie_word_embeddings": true,
199
+ "tokenizer_class": null
200
+ },
201
+ "audio_token_id": 59260,
202
+ "dtype": "bfloat16",
203
+ "hidden_size": 2048,
204
+ "model_type": "glmasr",
205
+ "num_mel_bins": 128,
206
+ "projector_hidden_act": "gelu",
207
+ "text_config": {
208
+ "_name_or_path": "",
209
+ "add_cross_attention": false,
210
+ "architectures": null,
211
+ "attention_bias": false,
212
+ "attention_dropout": 0.0,
213
+ "bos_token_id": 1,
214
+ "chunk_size_feed_forward": 0,
215
+ "cross_attention_hidden_size": null,
216
+ "decoder_start_token_id": null,
217
+ "dtype": null,
218
+ "eos_token_id": [
219
+ 59246,
220
+ 59253,
221
+ 59255
222
+ ],
223
+ "finetuning_task": null,
224
+ "head_dim": 128,
225
+ "hidden_act": "silu",
226
+ "hidden_size": 2048,
227
+ "id2label": {
228
+ "0": "LABEL_0",
229
+ "1": "LABEL_1"
230
+ },
231
+ "initializer_range": 0.02,
232
+ "intermediate_size": 6144,
233
+ "is_decoder": false,
234
+ "is_encoder_decoder": false,
235
+ "label2id": {
236
+ "LABEL_0": 0,
237
+ "LABEL_1": 1
238
+ },
239
+ "max_position_embeddings": 8192,
240
+ "mlp_bias": false,
241
+ "model_type": "llama",
242
+ "num_attention_heads": 16,
243
+ "num_hidden_layers": 28,
244
+ "num_key_value_heads": 4,
245
+ "output_attentions": false,
246
+ "output_hidden_states": false,
247
+ "pad_token_id": null,
248
+ "prefix": null,
249
+ "pretraining_tp": 1,
250
+ "problem_type": null,
251
+ "return_dict": true,
252
+ "rms_norm_eps": 1e-05,
253
+ "rope_parameters": {
254
+ "rope_theta": 10000.0,
255
+ "rope_type": "default"
256
+ },
257
+ "sep_token_id": null,
258
+ "task_specific_params": null,
259
+ "tie_word_embeddings": false,
260
+ "tokenizer_class": null,
261
+ "use_cache": true,
262
+ "vocab_size": 59264
263
+ },
264
+ "vocab_size": 59264
265
+ },
266
  "encoder_conv_layers": [
267
  [
268
  1,
 
289
  "v_proj"
290
  ],
291
  "mask_feature_length": 10,
292
+ "mask_feature_min_masks": 2,
293
+ "mask_feature_prob": 0.05,
294
+ "mask_time_length": 15,
295
  "mask_time_min_masks": 2,
296
+ "mask_time_prob": 0.1,
297
  "max_new_tokens": 128,
298
  "min_new_tokens": 0,
299
  "model_dtype": "bfloat16",
 
384
  "transformers_version": "5.0.0.dev0",
385
  "use_cache": false,
386
  "use_lora": true,
387
+ "use_specaugment": true,
 
388
  "vocab_size": 151670
389
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:313aec7864cc3fe4d2665e078ed53c50de654a555f8a0bd5feb2e7510f1c5b40
3
  size 25172384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2c9cdfcf8a55bad3b4c85dfa7218c8e63fb074a6b374673b0d0442d4b040bec
3
  size 25172384
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c16721198b4814a36169d05045565ed13372103aae1e2bf5a256f125117e24b7
3
  size 5201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb17eae71944b178ffca38182449645e5a12a962efd2d3eb01b9a84d725d5ed
3
  size 5201