owaski commited on
Commit
c36668b
·
verified ·
1 Parent(s): dbdd40e

Upload folder using huggingface_hub

Browse files
args.json CHANGED
@@ -164,7 +164,7 @@
164
  "deterministic_mode": false,
165
  "train_iters": null,
166
  "log_interval": 10,
167
- "tensorboard_dir": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-de-s_origin/v2-20260103-210544/runs",
168
  "no_masked_softmax_fusion": false,
169
  "no_bias_dropout_fusion": false,
170
  "no_bias_swiglu_fusion": false,
@@ -198,7 +198,7 @@
198
  "adam_beta2": 0.95,
199
  "adam_eps": 1e-08,
200
  "sgd_momentum": 0.9,
201
- "save": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-de-s_origin/v2-20260103-210544",
202
  "save_interval": 200,
203
  "no_save_optim": true,
204
  "no_save_rng": true,
@@ -316,10 +316,10 @@
316
  "local_world_size": 2,
317
  "model_suffix": "Qwen3-Omni-30B-A3B-Instruct",
318
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
319
- "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x14ff6ec0e840>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
320
  "model_dir": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct",
321
  "hub": "<class 'swift.hub.hub.MSHub'>",
322
- "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x14fed8295120>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x14fed8295080>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x14fed8449800>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x14fed84485e0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)",
323
  "extra_args": {
324
  "model": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct/",
325
  "model_type": "qwen3_omni",
@@ -474,7 +474,7 @@
474
  "mrope_interleaved": true,
475
  "add_version": true,
476
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
477
- "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x14ff6ec0e840>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
478
- "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x14fed8295120>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x14fed8295080>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x14fed8449800>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x14fed84485e0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)"
479
  }
480
  }
 
164
  "deterministic_mode": false,
165
  "train_iters": null,
166
  "log_interval": 10,
167
+ "tensorboard_dir": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-de-s_origin/v3-20260104-122937/runs",
168
  "no_masked_softmax_fusion": false,
169
  "no_bias_dropout_fusion": false,
170
  "no_bias_swiglu_fusion": false,
 
198
  "adam_beta2": 0.95,
199
  "adam_eps": 1e-08,
200
  "sgd_momentum": 0.9,
201
+ "save": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-de-s_origin/v3-20260104-122937",
202
  "save_interval": 200,
203
  "no_save_optim": true,
204
  "no_save_rng": true,
 
316
  "local_world_size": 2,
317
  "model_suffix": "Qwen3-Omni-30B-A3B-Instruct",
318
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
319
+ "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7f95eb9e6980>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
320
  "model_dir": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct",
321
  "hub": "<class 'swift.hub.hub.MSHub'>",
322
+ "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7f9554771260>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7f95547711c0>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7f9554935940>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7f9554934720>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)",
323
  "extra_args": {
324
  "model": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct/",
325
  "model_type": "qwen3_omni",
 
474
  "mrope_interleaved": true,
475
  "add_version": true,
476
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
477
+ "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7f95eb9e6980>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
478
+ "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7f9554771260>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7f95547711c0>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7f9554935940>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7f9554934720>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)"
479
  }
480
  }
model-00001-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4979f2c1a4613af321179fae0bf2afb12f06a1acaac58f7ad2f03d7dd504d04b
3
  size 4997899632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6069bad6e17ea36f1239c1ebb287a524ddba94f1bc64d2aa4bada8549c2f2d9
3
  size 4997899632
model-00002-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afe018558c35290c3447946815805b5add90065574a33e9aa750a8b6b942b2db
3
  size 4997754216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6af89fe6a7f523735a6288725878612b36a486277455a6c77e6682768ea16b2
3
  size 4997754216
model-00003-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f2c0a543df11913eb5e341df731d08bc75b19b99390ce5cb5cc09f461c1e16a
3
  size 4997754216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f10cf45dd2096fb7c19f291a9a8da4748242e6ed620921cc59a0664494fecc
3
  size 4997754216
model-00004-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e865e36440343d77fd6f5d27133793f3ac8fb1c7b79eac7abd8ecf86e6e1f45f
3
  size 4997755648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:682068eac184eeacdcd75114ca58af815d5f58f6c0d64827627b0a2533deb306
3
  size 4997755648
model-00005-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:848a7deaa4acfcb1b172a424aade32aeb6fc2973ccb55682636b07761d107453
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe25c92bd6b0c3786664206f21ca4b19bbdc74bce417bd87bfc333250d6554a
3
  size 4997755792
model-00006-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d29e8c075f0ec4d6a6fe9b2f8d0048a499e74d0de70cb81aadf4a20d1d27732
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e720b0ab4936a2640287d7f7ebf5a5e9319df23be20c7c3533fb22bbd720503a
3
  size 4997755792
model-00007-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f958c6fb519b3ccb519032e1ac820e349d0a37414ffda0e2b0f25edbe5393508
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed86ac987d76a4e00c722d1fc5efa4c7a1939a64c0a296a8c3770ad3f19d807c
3
  size 4997755792
model-00008-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f264611759b16e01ea5a2a680f89c964e0cb21154ad69da2d0361a4513640871
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62dcb82d0c3af2c6b405b194dfccd875b7b9c55963d714374105eb736a49551
3
  size 4997755792
model-00009-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8432c476d888dfebd5d79000e58190d000970c0c5ad920b18657da1496a03c4d
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4bd9363fbe9cd71ec64953cf9b43a3ffe84aa902aae4fd07a0af6d8d32be394
3
  size 4997755792
model-00010-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5660ec766984413d79b5d1dabd8e923d2d77bda9052664b28111840203b0ef6f
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28606ecfea70ab0c54be5329ce6f4e9aa2b09c93094e769ecec808d38ec52813
3
  size 4997755792
model-00011-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86f3ea5d5cd244f92abe9b2c23741ae126eb3f27fdec0c0d5b3376dd6fa8debb
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f0d3e89e8ccf55518a0680b64d88abc6b5dee646bf3d3f662fb77b861b0edd7
3
  size 4997755792
model-00012-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b715267667c207f02a9172f9d7409aa252b272de0ae8fbe118f1bf0d6ba68aaf
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e7112d90fcd38f9df96d7cb18c1dee7076a61c59aa47d4784a3fa24b7ed3ac
3
  size 4997755792
model-00013-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376927de21b8c2888ef1da50dc4d604bb9af7814263a75a31508b83301f82aa8
3
  size 4999771808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:359228b4a087564f3f0f2171a0711a02d455347bf0cb3760918d04fe8298b9e8
3
  size 4999771808