diff --git a/lisa-ivl2-2b_aati_sr/ckpt_model/config.json b/lisa-ivl2-2b_aati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..08adce055311f65625c6d1e4a373a39fb1a5db0c --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/ckpt_model/config.json @@ -0,0 +1,87 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 2, + "force_image_size": 448, + "llm_config": { + "_name_or_path": "internlm/internlm2-chat-1_8b", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "configuration_internlm2.InternLM2Config", + "AutoModel": "modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 32768, + "model_type": "internlm2", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "num_key_value_heads": 8, + "pad_token_id": 2, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.0, + "type": "dynamic" + }, + "rope_theta": 1000000, + "use_bfloat16": true, + "use_cache": true, + "vocab_size": 92555 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad_token_id": 2, + "ps_version": "v2", + "select_layer": -1, + "template": "internlm2-chat", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "drop_path_rate": 0.0, + "dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 24, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "use_bfloat16": true, + "use_flash_attn": true + } +} diff --git a/lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors b/lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00251351e46e3904ba11f6b6b209dde89127f1da --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db228d0f6958ce4561ad6c7b804104e6611700346e0490d1a409bdae9a01ee37 +size 4470345504 diff --git a/lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin b/lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0302493ac5c7d9db7d520a065b0c09fc54750d5d --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5483cf43ae9904b7a17b8cfc6f37988cd9b04cff3dd97add6d7329ce105964 +size 7352 diff --git a/lisa-ivl2-2b_aati_sr/evaluation_metrics.json b/lisa-ivl2-2b_aati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..d406fa129ade3fa7eb5416bfdd2fa59dcf365c8e --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/evaluation_metrics.json @@ -0,0 +1,176 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.4299730360507965, + "eval_ciou": 0.5639147162437439 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.4750661551952362, + "eval_ciou": 0.49450477957725525 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5029056668281555, + "eval_ciou": 0.5545530319213867 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5038564205169678, + "eval_ciou": 0.5760622024536133 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.4931972026824951, + "eval_ciou": 0.5593799352645874 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5129929780960083, + "eval_ciou": 0.5596947073936462 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5289523005485535, + "eval_ciou": 0.5600836873054504 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5335568785667419, + "eval_ciou": 0.5150668621063232 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5301417708396912, + "eval_ciou": 0.6077501773834229 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5494945049285889, + "eval_ciou": 0.5914241075515747 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.5493737459182739, + "eval_ciou": 0.6159847974777222 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.5445547699928284, + "eval_ciou": 0.5939457416534424 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.5355852842330933, + "eval_ciou": 0.5724180936813354 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.5353021621704102, + "eval_ciou": 0.5821977257728577 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.5353786945343018, + "eval_ciou": 0.5801292061805725 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.5340729355812073, + "eval_ciou": 0.577383816242218 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.5309929251670837, + "eval_ciou": 0.5779208540916443 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.5346851944923401, + "eval_ciou": 0.5808385014533997 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.5436327457427979, + "eval_ciou": 0.6044915318489075 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.5427136421203613, + "eval_ciou": 0.5983464121818542 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.534633219242096, + "eval_ciou": 0.5675567984580994 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.7851775288581848, + "eval_ciou": 0.7884453535079956 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8090593218803406, + "eval_ciou": 0.8159176111221313 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7596240043640137, + "eval_ciou": 0.7600759863853455 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7349771857261658, + "eval_ciou": 0.7298452258110046 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.7746085524559021, + "eval_ciou": 0.7730916142463684 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.6849038004875183, + "eval_ciou": 0.6688467264175415 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7478209733963013, + "eval_ciou": 0.7591495513916016 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7427729368209839, + "eval_ciou": 0.7514315247535706 + } +] \ No newline at end of file diff --git a/lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0 b/lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0 new file mode 100644 index 0000000000000000000000000000000000000000..27bc7fe4f8c290a151ddf317235f4a89b3d9b999 --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff85d72042556023246781cc0c4c66d6f8d335407401c64599dac3ce77e8fe1 +size 88 diff --git a/lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0 b/lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0 new file mode 100644 index 0000000000000000000000000000000000000000..c01e683d55f4f9faa9893056da2013177f8b1f35 --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a321f870b68b8ec8959825f0bbdcc8fb471750bfc88c5d4aa6bbe242a19fcb1b +size 419822 diff --git a/lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1 b/lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1 new file mode 100644 index 0000000000000000000000000000000000000000..0ad84547c9f24781dbcfb4b4bf87118894f44e79 --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76214c162c6cd38819fd854b3631bd4d684e2e40a92bb87ddcbd7dd42ba28bff +size 221855 diff --git a/lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2 b/lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2 new file mode 100644 index 0000000000000000000000000000000000000000..aa6b4848d8e1a4f69d804ceeed85a74b8793a783 --- /dev/null +++ b/lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8496bdaa0d7e4565e1a1a41424515e0446d9afa01b648d87e80bb325a7286963 +size 1402 diff --git a/lisa-ivl3-2b_aati_sr/ckpt_model/config.json b/lisa-ivl3-2b_aati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f47b8fe52b9f900a27680003fc321d0e434734c6 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b8756ec9fa65a596154e58c2c8ffeeade5400bcf4af3f3807b310dc211a352f +size 4211070232 diff --git a/lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d09f1ea3f7eb3fb49830793b5eed47fd3241f52f --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32ca3f97667c68df088ddf7ee12d0dc27112557a472d11076fc1e7cb4fada87 +size 7352 diff --git a/lisa-ivl3-2b_aati_sr/evaluation_metrics.json b/lisa-ivl3-2b_aati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..10e51c69b1f9dd8d757066ed79621e8a112a5aad --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/evaluation_metrics.json @@ -0,0 +1,182 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5756632685661316, + "eval_ciou": 0.6737087965011597 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5750120282173157, + "eval_ciou": 0.6774965524673462 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5997360348701477, + "eval_ciou": 0.6924350261688232 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5967223048210144, + "eval_ciou": 0.6778789162635803 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5993068218231201, + "eval_ciou": 0.6605137586593628 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5851569175720215, + "eval_ciou": 0.6708498597145081 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5863112211227417, + "eval_ciou": 0.691616415977478 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5981602668762207, + "eval_ciou": 0.6373696327209473 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5868176221847534, + "eval_ciou": 0.6186509728431702 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.599888026714325, + "eval_ciou": 0.6218949556350708 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5966016054153442, + "eval_ciou": 0.6475383639335632 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.599604070186615, + "eval_ciou": 0.6404339075088501 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.6057789325714111, + "eval_ciou": 0.6498401165008545 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6040271520614624, + "eval_ciou": 0.6116575002670288 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.60584956407547, + "eval_ciou": 0.6290444135665894 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.6188424229621887, + "eval_ciou": 0.6753682494163513 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6140751242637634, + "eval_ciou": 0.658414900302887 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6123270988464355, + "eval_ciou": 0.6641766428947449 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.619356095790863, + "eval_ciou": 0.6501895785331726 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6187554597854614, + "eval_ciou": 0.6563228964805603 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.6174865961074829, + "eval_ciou": 0.6626467108726501 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6232897043228149, + "eval_ciou": 0.6276214718818665 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.8062067627906799, + "eval_ciou": 0.810879111289978 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8242553472518921, + "eval_ciou": 0.8299362659454346 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7838391661643982, + "eval_ciou": 0.786938488483429 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7634923458099365, + "eval_ciou": 0.7587481737136841 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8028817176818848, + "eval_ciou": 0.8037988543510437 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7256011366844177, + "eval_ciou": 0.7161976099014282 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7690672278404236, + "eval_ciou": 0.7782297134399414 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7630250453948975, + "eval_ciou": 0.7679163217544556 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0 b/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0 new file mode 100644 index 0000000000000000000000000000000000000000..6d2ad417d3554f4a6858461baf98aedef4cefb6c --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6060d551386abeeef58b5d85a5781e838437a3f4f15aae1bb56735d9d87c995 +size 62245 diff --git a/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0 b/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0 new file mode 100644 index 0000000000000000000000000000000000000000..956120e4719fa3e742d744278c53fc8cd4d128de --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff02795f8e9f35bda1236632a6fd2dfb546ccfaf86f99081ac0ce7795a3ab1e6 +size 88 diff --git a/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0 b/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0 new file mode 100644 index 0000000000000000000000000000000000000000..cb8e2179615241ff38ec704bbc05ce7b250bff38 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde771411e16dac8b9ec7ecbdd1d1d5df1d90e4abc4928f2147f896093f43774 +size 378412 diff --git a/lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1 b/lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1 new file mode 100644 index 0000000000000000000000000000000000000000..77c7c126e3e311f2909c1ceda8270266149a3636 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b24ce4e45eabeb0c6fc91a2b8bc9dcede990e16d18b17e8744d98c220d93e0 +size 41157 diff --git a/lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1 b/lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1 new file mode 100644 index 0000000000000000000000000000000000000000..c5d21d6505c6fae6e40d31ec8d19f8204550a37a --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2802bbb94769223c74bd2a3406fa8febce2e7f72e7795feea4c08b55409e34 +size 9116 diff --git a/lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1 b/lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1 new file mode 100644 index 0000000000000000000000000000000000000000..2aa3a782158506016733d9948f2c62f4b360b074 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:236a74e7a10f5744921f0f493a18f2ed79694ab8ee3f9f0567da35a7ae00fe7a +size 201997 diff --git a/lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2 b/lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2 new file mode 100644 index 0000000000000000000000000000000000000000..d9f0a95b6007faa6ae0fa00297a33bd0a7e8f97d --- /dev/null +++ b/lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8507b72c7c21b75d4e0b67b9ba7029d67536808b9ddc221aff9db173755b683c +size 1402 diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json b/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors b/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f11341d2f3492e899fa80563da509ff4edd724db --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38203e4873ce0d8abfd8a0527229991c202472124463253ad5706c15bfcca80 +size 4211070232 diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin b/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..aaf0c18040a0eb9187fcdb19630cdc326148b5cb --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5858fec803a4d4d42cb4f8643322589d07cf0bbba9e7d456d7b801bbdca6c7 +size 7352 diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json b/lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..2507ffe1637a04703cbb09e3344928d8fc4f453d --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json @@ -0,0 +1,176 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5831701755523682, + "eval_ciou": 0.6626128554344177 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5952411890029907, + "eval_ciou": 0.6626877188682556 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.6062884330749512, + "eval_ciou": 0.6667135953903198 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6196147799491882, + "eval_ciou": 0.6437596678733826 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6216316223144531, + "eval_ciou": 0.6733407974243164 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6207277774810791, + "eval_ciou": 0.7004563808441162 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6256881952285767, + "eval_ciou": 0.681186318397522 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6138451099395752, + "eval_ciou": 0.6687238812446594 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6257895231246948, + "eval_ciou": 0.6808822154998779 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6295360326766968, + "eval_ciou": 0.6649029850959778 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.6255075931549072, + "eval_ciou": 0.672091543674469 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.6274581551551819, + "eval_ciou": 0.6651784181594849 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6306081414222717, + "eval_ciou": 0.6813814640045166 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6242629289627075, + "eval_ciou": 0.6436342000961304 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.636084794998169, + "eval_ciou": 0.6750655770301819 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6268562078475952, + "eval_ciou": 0.6949459314346313 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6331221461296082, + "eval_ciou": 0.6858609914779663 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.6354182958602905, + "eval_ciou": 0.6718393564224243 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6372456550598145, + "eval_ciou": 0.6829223036766052 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.635535717010498, + "eval_ciou": 0.6737943291664124 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6199853420257568, + "eval_ciou": 0.6205106973648071 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.8225948214530945, + "eval_ciou": 0.8271152377128601 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8378753066062927, + "eval_ciou": 0.843865156173706 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.8056657910346985, + "eval_ciou": 0.8094556331634521 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7836799025535583, + "eval_ciou": 0.7731773853302002 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8187907338142395, + "eval_ciou": 0.8201593160629272 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7530681490898132, + "eval_ciou": 0.741623044013977 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7875200510025024, + "eval_ciou": 0.7955977320671082 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7873671650886536, + "eval_ciou": 0.7938516736030579 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0 b/lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0 new file mode 100644 index 0000000000000000000000000000000000000000..efe6f12fd929d357de7b94e60b977d97c5c830b0 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c2af395273417556689a5d08107ef7aa0428fa56a3865c50dd72904187edc3 +size 419822 diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1 b/lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1 new file mode 100644 index 0000000000000000000000000000000000000000..ff52ba0f54bc22f97d80279d313b0d1b5a914129 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fce42c53d94518d177027c274c37c2cda048ea86b389c783cfaab5bbca82f90 +size 223363 diff --git a/lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2 b/lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2 new file mode 100644 index 0000000000000000000000000000000000000000..18fccbb04c1f06d7256ff245ca87c9aa24b72175 --- /dev/null +++ b/lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181b60d216f4848288f94bcee5fe5a3911b349dbdb683b7bff42595701bca3c8 +size 1402 diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0c8f5e565025fa48875cf5aa3c551ebdde93e5a --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173587561539e9ca8afd26179c60ac88288d2251ffbcdfc852b311549c141bf8 +size 4244119544 diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebeadd660b02f6fa57e832c7ce4a0a5e455e864d --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8234d7498e224822badd158dcf7a4e5ff9f0b47390f78268aca5df600092ab +size 7352 diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..ffd4639621612f6b6d8e49f76458401f718d06aa --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.49992606043815613, + "eval_ciou": 0.6110827922821045 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5632675290107727, + "eval_ciou": 0.623365581035614 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5778806805610657, + "eval_ciou": 0.6206309199333191 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5851303935050964, + "eval_ciou": 0.6020804643630981 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5820455551147461, + "eval_ciou": 0.6651975512504578 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6028497219085693, + "eval_ciou": 0.6962510943412781 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5977049469947815, + "eval_ciou": 0.6804401874542236 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6007415652275085, + "eval_ciou": 0.6796437501907349 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.611798882484436, + "eval_ciou": 0.6809463500976562 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6134523153305054, + "eval_ciou": 0.6816759705543518 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6059213876724243, + "eval_ciou": 0.6428766846656799 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7799202799797058, + "eval_ciou": 0.786268413066864 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8031054139137268, + "eval_ciou": 0.8086256980895996 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7538403272628784, + "eval_ciou": 0.7585700750350952 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7317773699760437, + "eval_ciou": 0.7287389039993286 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7756525278091431, + "eval_ciou": 0.7790922522544861 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6839732527732849, + "eval_ciou": 0.6755383610725403 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7441026568412781, + "eval_ciou": 0.7565898895263672 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7429221272468567, + "eval_ciou": 0.7514289021492004 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0 b/lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0 new file mode 100644 index 0000000000000000000000000000000000000000..8788bed58ddbca7d29f145303ea3ea6c56278179 --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a626d6013165f7d36b0d694d7cb70efc26aa7293b04f2b4033c9f6aaa49e6c89 +size 212352 diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1 b/lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1 new file mode 100644 index 0000000000000000000000000000000000000000..2ee736f6e3dd7800a9bec4ca3ed324d5b2b8f5f2 --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca84167c42051785d90a2d5cfe045d729b673057867500e41f40bee333d9175 +size 116399 diff --git a/lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2 b/lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2 new file mode 100644 index 0000000000000000000000000000000000000000..f884f1758b930f8bb2a7fd9e96a09eaa1299329d --- /dev/null +++ b/lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28be3c60b606cd71d8db4d463b62c2df3c4bd394ae7c4e8ac23f0f56f371b33a +size 1402 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6550f8f9edbd6422a5d2c94c6cf8b0f2b8062e3c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4a808fc1338f523d39870fc6f62d3d0c3f65a38db9f6aba9ff19db3dbb7f81 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8af761d533d2a1a4cf2c9f76b350dee6bc3319b0 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379c086e43080e14b752c22cfffeb2e07a07c0bd6ea16efb629f93d6cf26012c +size 7352 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..879cc19aae40a49ca53e3876f6f6f7d33f81f730 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.529840350151062, + "eval_ciou": 0.61083984375 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5495224595069885, + "eval_ciou": 0.6280785202980042 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5777504444122314, + "eval_ciou": 0.6374984979629517 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.577897846698761, + "eval_ciou": 0.6562594771385193 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5785743594169617, + "eval_ciou": 0.6527135968208313 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5933905243873596, + "eval_ciou": 0.6383258700370789 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6087335348129272, + "eval_ciou": 0.6717571020126343 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6045055985450745, + "eval_ciou": 0.6598408818244934 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6039425730705261, + "eval_ciou": 0.6512514352798462 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6099196076393127, + "eval_ciou": 0.6599507927894592 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5983391404151917, + "eval_ciou": 0.6378564238548279 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7846658825874329, + "eval_ciou": 0.7908703684806824 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8061053156852722, + "eval_ciou": 0.8121806979179382 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7557012438774109, + "eval_ciou": 0.7612731456756592 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7339252233505249, + "eval_ciou": 0.7308076620101929 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7778381109237671, + "eval_ciou": 0.7786577343940735 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6824975609779358, + "eval_ciou": 0.6733501553535461 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7487913370132446, + "eval_ciou": 0.7593491077423096 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.741722047328949, + "eval_ciou": 0.7474746108055115 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0 new file mode 100644 index 0000000000000000000000000000000000000000..a06cf226fc91c36a7c4feeedab7c36624ffcf618 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4168921ea1ec0605b7c338bf6d503272031b323ecd4d82df1611cc32a60dffff +size 88 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0 new file mode 100644 index 0000000000000000000000000000000000000000..1eb9175a20a1d4ee0919b2f2e66c7b7a77ada154 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4061204dccb97c29c5016a06fb6b8106538e983d4706a9d8cfaf792ab96b22 +size 88 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0 new file mode 100644 index 0000000000000000000000000000000000000000..06b812afb7932d7c4c7d48175f1455721c686065 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c17b438e249a33aa0523b19d071b841150d521f4866f6a455593a32537716925 +size 212352 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1 new file mode 100644 index 0000000000000000000000000000000000000000..7a9f0182e8d129933322665b0b0725c9b3566765 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d1081dc5085eec9aa9a354881fc3242aef9ef28c256ec99da1afaf8dbc23db +size 9142 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1 new file mode 100644 index 0000000000000000000000000000000000000000..d3e4dc8e9fead2ff004a2a8a6f639c88a77a82e1 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8b67e67b8dea507d2af6b314c185d004a4f6812d77e6466ac9798e3dad32b3 +size 9142 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1 new file mode 100644 index 0000000000000000000000000000000000000000..ced1edeb3428f1f0bc4b1d307f82a9ff35038936 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e63030a506caa5bfdc317829ff84b1ddab6909836eaafeac7140ef1a7d1553 +size 116408 diff --git a/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2 b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2 new file mode 100644 index 0000000000000000000000000000000000000000..3d9a7eb07e287edb36df8c35bb48bd3369d38647 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39734374fddcd4e3ccd2b2229fe29d71a729ae3759c1b098cb295dc411c47503 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8055c84abdb20c9374c650efaa27130e6a4d7561 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e765f787a4b4b90a44c541c060bb60a6be32e0be5cf6019395536ef4edefc8fb +size 4234675816 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..306db2a8dbfa7e2539ea6c6c067d384d7bccb07c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f76ac58c2275447d9bcb0a727e23e4a780f4b7215404a84c15dbfdfb1231188 +size 7352 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..1912e1fda2336e19c5a7592541f4aa29c0c6ed18 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5373343229293823, + "eval_ciou": 0.6180018782615662 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5649374127388, + "eval_ciou": 0.636763334274292 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5869829654693604, + "eval_ciou": 0.7015414834022522 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5917444825172424, + "eval_ciou": 0.7137655019760132 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5996885895729065, + "eval_ciou": 0.7088227868080139 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6110551953315735, + "eval_ciou": 0.6965492963790894 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6078798174858093, + "eval_ciou": 0.718289852142334 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6149584054946899, + "eval_ciou": 0.6968558430671692 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6170741319656372, + "eval_ciou": 0.7212521433830261 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6232219934463501, + "eval_ciou": 0.7210202217102051 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.596360981464386, + "eval_ciou": 0.6341654062271118 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7841194868087769, + "eval_ciou": 0.7900864481925964 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8032773733139038, + "eval_ciou": 0.8108689188957214 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7510504722595215, + "eval_ciou": 0.7533969879150391 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7322676181793213, + "eval_ciou": 0.727592408657074 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7752026915550232, + "eval_ciou": 0.7760695219039917 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6822892427444458, + "eval_ciou": 0.67359459400177 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7448171973228455, + "eval_ciou": 0.7525338530540466 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7451841235160828, + "eval_ciou": 0.7531925439834595 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0 b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0 new file mode 100644 index 0000000000000000000000000000000000000000..649edd69161912fe68e7c4628e76f603e0808e0e --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742dec7da1d819b4b47b6b557d7c0265011f67a5b2fb8837f2e59ea2e2f5c5b3 +size 88 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0 b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0 new file mode 100644 index 0000000000000000000000000000000000000000..9a9b12439310fff0ff42ab3a4400cc17d31643cc --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad55cd273aecdd44863bd2d637e019c83e3779f07c25f3f53358f367865772f +size 88 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759311911.bask-pg0309u17a.1541936.0 b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759311911.bask-pg0309u17a.1541936.0 new file mode 100644 index 0000000000000000000000000000000000000000..6e6b78cb52aaf87feb1dad70ad2ebde91d2a692b --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759311911.bask-pg0309u17a.1541936.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a742d242da1bf6e55fb715070e31677e84e0855f5459a14de10a88671a0f5c1 +size 20254 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759316853.bask-pg0308u29a.2749490.0 b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759316853.bask-pg0308u29a.2749490.0 new file mode 100644 index 0000000000000000000000000000000000000000..cf04dfa3ca433518df5cf7f00aef1257c7550404 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759316853.bask-pg0308u29a.2749490.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353490de2da2f12dc4543711fd438d0c0576385be49cc89044881c4d6ef3c95c +size 212352 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_10-00-58_bask-pg0308u29a/events.out.tfevents.1759309346.bask-pg0308u29a.2496177.1 b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_10-00-58_bask-pg0308u29a/events.out.tfevents.1759309346.bask-pg0308u29a.2496177.1 new file mode 100644 index 0000000000000000000000000000000000000000..cdbae7e92dd6f0fcd237db4cc014c15a0809019d --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_10-00-58_bask-pg0308u29a/events.out.tfevents.1759309346.bask-pg0308u29a.2496177.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f546cc85bad624c669343e08cdf1dcb067a9e1210a57b6da1d6e99d4266a7d +size 9141 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_10-45-08_bask-pg0309u17a/events.out.tfevents.1759311982.bask-pg0309u17a.1541936.1 b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_10-45-08_bask-pg0309u17a/events.out.tfevents.1759311982.bask-pg0309u17a.1541936.1 new file mode 100644 index 0000000000000000000000000000000000000000..de89160d21c17275dfbbf17b134239f4e84c4b63 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_10-45-08_bask-pg0309u17a/events.out.tfevents.1759311982.bask-pg0309u17a.1541936.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a88600170707ce1d09c4f07d2cd3e4be6bf64612204255f465598e5e33cb8df +size 19643 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_12-07-29_bask-pg0308u29a/events.out.tfevents.1759316930.bask-pg0308u29a.2749490.1 b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_12-07-29_bask-pg0308u29a/events.out.tfevents.1759316930.bask-pg0308u29a.2749490.1 new file mode 100644 index 0000000000000000000000000000000000000000..380b85d32f6a2040b8ada886c225838aca0a0d85 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_12-07-29_bask-pg0308u29a/events.out.tfevents.1759316930.bask-pg0308u29a.2749490.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a125371ee4a9b0427c60ee81f8c3a7006a4a704ea55043b42c41cd2928a126 +size 116407 diff --git a/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_12-07-29_bask-pg0308u29a/events.out.tfevents.1759367632.bask-pg0308u29a.2749490.2 b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_12-07-29_bask-pg0308u29a/events.out.tfevents.1759367632.bask-pg0308u29a.2749490.2 new file mode 100644 index 0000000000000000000000000000000000000000..504925b1a0e968be70de98d528496e5c9c4c55d9 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122_vlorati_sr/runs/Oct01_12-07-29_bask-pg0308u29a/events.out.tfevents.1759367632.bask-pg0308u29a.2749490.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0a612d03f07e3b509b543e8204aabfef372561f178dd155fc8d7d5713284262 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..1988fef8bc96157737a98cf9f08e081d4c6380fc --- /dev/null +++ b/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,56 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5512410998344421, + "eval_ciou": 0.6049960851669312 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5539265871047974, + "eval_ciou": 0.6427664160728455 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5823343396186829, + "eval_ciou": 0.6322457790374756 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5806931853294373, + "eval_ciou": 0.6692107915878296 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5740975737571716, + "eval_ciou": 0.6411476135253906 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5921218991279602, + "eval_ciou": 0.606126606464386 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5686666965484619, + "eval_ciou": 0.626236081123352 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5891085267066956, + "eval_ciou": 0.6414065957069397 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.593542754650116, + "eval_ciou": 0.6374639272689819 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/events.out.tfevents.1759396852.bask-pg0308u25a.368261.0 b/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/events.out.tfevents.1759396852.bask-pg0308u25a.368261.0 new file mode 100644 index 0000000000000000000000000000000000000000..ad61916665a09a0611780e71c57d3a39bda3654f --- /dev/null +++ b/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/events.out.tfevents.1759396852.bask-pg0308u25a.368261.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2dac3d1051c38ebbabb794786647bccfa86598b13fe16ea730116a4e78ea07 +size 206167 diff --git a/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/runs/Oct02_10-20-48_bask-pg0308u25a/events.out.tfevents.1759396933.bask-pg0308u25a.368261.1 b/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/runs/Oct02_10-20-48_bask-pg0308u25a/events.out.tfevents.1759396933.bask-pg0308u25a.368261.1 new file mode 100644 index 0000000000000000000000000000000000000000..4a4cf4f9570a546a7de76a418d76f11c223350e7 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122avg_2_vlorati_sr/runs/Oct02_10-20-48_bask-pg0308u25a/events.out.tfevents.1759396933.bask-pg0308u25a.368261.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c68a3856348d3b019a927fc31eeea2bc59790b8cdb6e90d1edc8816f9608cd6 +size 115492 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f03f93b2e7402a5068f5a96b67696084276ed689 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42552b8773d9f0b5a52e6a4902a1b171fbc345be4acfeb5409edb293ecfa4978 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f8d7c83397dd4623ef97fc9c5dd76dde5d4bbb1 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac22d11c81a60e57eac65d0c3b0551d8210025f2c5c0d691ad64deed480855e4 +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..74e64630a2643e96999eec0da778a9491ba51aef --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5392094850540161, + "eval_ciou": 0.6282947063446045 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5576633214950562, + "eval_ciou": 0.6598897576332092 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5833312273025513, + "eval_ciou": 0.6498080492019653 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5798692107200623, + "eval_ciou": 0.6873687505722046 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.592064380645752, + "eval_ciou": 0.6775320768356323 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5976513624191284, + "eval_ciou": 0.6707125306129456 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6185094714164734, + "eval_ciou": 0.6921017169952393 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6111495494842529, + "eval_ciou": 0.6619952917098999 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6203259825706482, + "eval_ciou": 0.6839476823806763 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.622075617313385, + "eval_ciou": 0.6720116138458252 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5997777581214905, + "eval_ciou": 0.6268056631088257 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7880642414093018, + "eval_ciou": 0.7902976274490356 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8099915385246277, + "eval_ciou": 0.814264178276062 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7616512179374695, + "eval_ciou": 0.7614732980728149 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7375759482383728, + "eval_ciou": 0.7304275035858154 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7823287844657898, + "eval_ciou": 0.7828382849693298 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6862552762031555, + "eval_ciou": 0.673291027545929 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7496898770332336, + "eval_ciou": 0.756252110004425 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7468956112861633, + "eval_ciou": 0.7501649856567383 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/events.out.tfevents.1759487100.bask-pg0309u05a.776367.0 b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/events.out.tfevents.1759487100.bask-pg0309u05a.776367.0 new file mode 100644 index 0000000000000000000000000000000000000000..c1069c6ded313e557b4616008740a0decce3a95f --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/events.out.tfevents.1759487100.bask-pg0309u05a.776367.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ff77b63ed688e2cd9eb15cd07166a9f7a58a1ed362d6909fb3a7a22d9285df +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/events.out.tfevents.1759487208.bask-pg0309u05a.779452.0 b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/events.out.tfevents.1759487208.bask-pg0309u05a.779452.0 new file mode 100644 index 0000000000000000000000000000000000000000..21aa2354bd506071d281e82b6aad03f9c3b51e4b --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/events.out.tfevents.1759487208.bask-pg0309u05a.779452.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72f638ab99d6890c7bf7c18b6c4b27f701454e78a0e79aed47cf3292dab3daa +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/runs/Oct03_11-26-45_bask-pg0309u05a/events.out.tfevents.1759487280.bask-pg0309u05a.779452.1 b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/runs/Oct03_11-26-45_bask-pg0309u05a/events.out.tfevents.1759487280.bask-pg0309u05a.779452.1 new file mode 100644 index 0000000000000000000000000000000000000000..618d83410e97669b2dd3d689c891d8ca122391e2 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/runs/Oct03_11-26-45_bask-pg0309u05a/events.out.tfevents.1759487280.bask-pg0309u05a.779452.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea003869de34d8e9f399342a4781cfd7e4218c71bf0790fab731e4b8b3623fb +size 116428 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/runs/Oct03_11-26-45_bask-pg0309u05a/events.out.tfevents.1759514329.bask-pg0309u05a.779452.2 b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/runs/Oct03_11-26-45_bask-pg0309u05a/events.out.tfevents.1759514329.bask-pg0309u05a.779452.2 new file mode 100644 index 0000000000000000000000000000000000000000..4c9f76f7f2adc2980231929844d63f20919173fb --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_5msea_vlorati_sr/runs/Oct03_11-26-45_bask-pg0309u05a/events.out.tfevents.1759514329.bask-pg0309u05a.779452.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c580d0ee59559e1e3e1fe37f3f77745de27ec905e3219007f21683c3e4da7fcd +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34acbeb16c56d8c696c0b927662f137deafea886 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0804df1641ae009bf56a6a49bfcf4a200d4e3165527c70601f5b38e5df77c83b +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..201cd0abfc27b3cde13da1c17f306ec0e9761791 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb82c502e919e6eeda6cfa1001673943632f205babd773b21c1de2b165e679f +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c040c49fa6755cd6197c88626701b19f0bc1c6c8 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5562365055084229, + "eval_ciou": 0.6643704771995544 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5716431736946106, + "eval_ciou": 0.6611185073852539 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5766915082931519, + "eval_ciou": 0.6548216938972473 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5725152492523193, + "eval_ciou": 0.5463446974754333 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5759479999542236, + "eval_ciou": 0.6820641160011292 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5743090510368347, + "eval_ciou": 0.6215196847915649 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.584280252456665, + "eval_ciou": 0.614290714263916 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.58842533826828, + "eval_ciou": 0.6595209240913391 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.597851037979126, + "eval_ciou": 0.6563701629638672 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6018281579017639, + "eval_ciou": 0.6752097606658936 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5995708703994751, + "eval_ciou": 0.6376393437385559 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7907702922821045, + "eval_ciou": 0.7953957915306091 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8092643022537231, + "eval_ciou": 0.8132203221321106 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7610157132148743, + "eval_ciou": 0.7595085501670837 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7379903197288513, + "eval_ciou": 0.7295799851417542 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7817531824111938, + "eval_ciou": 0.7821621298789978 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6909171938896179, + "eval_ciou": 0.6794775724411011 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.750555694103241, + "eval_ciou": 0.7573494911193848 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7468773126602173, + "eval_ciou": 0.7512911558151245 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759430578.bask-pg0308u12a.1485695.0 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759430578.bask-pg0308u12a.1485695.0 new file mode 100644 index 0000000000000000000000000000000000000000..829b8f9e45094dc3fb1d0520d1cdbbe442c866f8 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759430578.bask-pg0308u12a.1485695.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770ff008654cb5e104c105c23e25db6809c796d1fb2bd40f6f170c1c6b38fa2f +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759430749.bask-pg0308u12a.1490044.0 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759430749.bask-pg0308u12a.1490044.0 new file mode 100644 index 0000000000000000000000000000000000000000..09fae1bbc46b370016798899ae62b962e92271ef --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759430749.bask-pg0308u12a.1490044.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978774fcab191be212abf4fced29cfffd06bccbd13406daf68270747a21f11a7 +size 486 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759431015.bask-pg0308u12a.1495296.0 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759431015.bask-pg0308u12a.1495296.0 new file mode 100644 index 0000000000000000000000000000000000000000..9fb475a3617d14b556ed40bd9f293ea35eb7743c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/events.out.tfevents.1759431015.bask-pg0308u12a.1495296.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:415d4ecfb41b12b1e6ba0a599a2ea984b375475e53cd6d5b66de776a0675767f +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-42-55_bask-pg0308u12a/events.out.tfevents.1759430651.bask-pg0308u12a.1485695.1 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-42-55_bask-pg0308u12a/events.out.tfevents.1759430651.bask-pg0308u12a.1485695.1 new file mode 100644 index 0000000000000000000000000000000000000000..24f220c34638a976179657b4eba67620ec003606 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-42-55_bask-pg0308u12a/events.out.tfevents.1759430651.bask-pg0308u12a.1485695.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0076115f29e49c0b68f3a48919853d9b16cbda44117600a26fb38b4b7aea49 +size 9165 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-45-46_bask-pg0308u12a/events.out.tfevents.1759430811.bask-pg0308u12a.1490044.1 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-45-46_bask-pg0308u12a/events.out.tfevents.1759430811.bask-pg0308u12a.1490044.1 new file mode 100644 index 0000000000000000000000000000000000000000..5e650f1d0128609d64146eb1b50a7fb32ac907e3 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-45-46_bask-pg0308u12a/events.out.tfevents.1759430811.bask-pg0308u12a.1490044.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b6fa47f498929e30f1949f225d33dacf7f368fad410d7423e8493f8b7afe6e +size 9372 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-50-12_bask-pg0308u12a/events.out.tfevents.1759431076.bask-pg0308u12a.1495296.1 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-50-12_bask-pg0308u12a/events.out.tfevents.1759431076.bask-pg0308u12a.1495296.1 new file mode 100644 index 0000000000000000000000000000000000000000..d306871a120f5897c51b6c88fc51fcc879df73cf --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-50-12_bask-pg0308u12a/events.out.tfevents.1759431076.bask-pg0308u12a.1495296.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf88d4ed2684e98570d6dd60a854cf7f4850e7f0f40d6959f8abbe1f92c8cfb3 +size 116431 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-50-12_bask-pg0308u12a/events.out.tfevents.1759482041.bask-pg0308u12a.1495296.2 b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-50-12_bask-pg0308u12a/events.out.tfevents.1759482041.bask-pg0308u12a.1495296.2 new file mode 100644 index 0000000000000000000000000000000000000000..8704eb3a74ad7924fbcd60df98c080838a803e30 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mse_vlorati_sr_1/runs/Oct02_19-50-12_bask-pg0308u12a/events.out.tfevents.1759482041.bask-pg0308u12a.1495296.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79dc0980e4da5072fddea738c0d57c0d4721c3a0da5e87a20d1bcf1056f79240 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63b7c35f5cd63d9a0f3efe59f2abd7e120aab766 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93dada071125f71a50dd2e9c0016cab4b290e9e00111f52bf0799a42280ab8fb +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..15dbe98b50223fe61fc6fc075514bf415137c51c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edd52970916a6cdd2b74975d4de7dcc32149856d5daddb7e9a32cdf27bad502 +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..96145a41da109a6ec9845d9a66064d01a6eac9d2 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5467231273651123, + "eval_ciou": 0.5753340125083923 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5644440054893494, + "eval_ciou": 0.660574734210968 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.6023533940315247, + "eval_ciou": 0.6768925786018372 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5986256003379822, + "eval_ciou": 0.669558048248291 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.594001829624176, + "eval_ciou": 0.658316433429718 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6124429702758789, + "eval_ciou": 0.6718175411224365 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.619059681892395, + "eval_ciou": 0.6924576759338379 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6124151945114136, + "eval_ciou": 0.6846053004264832 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6129991412162781, + "eval_ciou": 0.6813850998878479 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6190656423568726, + "eval_ciou": 0.6838319897651672 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6097272038459778, + "eval_ciou": 0.6174986958503723 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7879274487495422, + "eval_ciou": 0.7913415431976318 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8099437355995178, + "eval_ciou": 0.8135954737663269 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7623280882835388, + "eval_ciou": 0.7650956511497498 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7391623258590698, + "eval_ciou": 0.7315544486045837 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7828981876373291, + "eval_ciou": 0.783746063709259 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6889922618865967, + "eval_ciou": 0.6779899597167969 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7484031319618225, + "eval_ciou": 0.755821943283081 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7464094161987305, + "eval_ciou": 0.751054048538208 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486084.bask-pg0308u03a.1056476.0 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486084.bask-pg0308u03a.1056476.0 new file mode 100644 index 0000000000000000000000000000000000000000..1482c7b7e5c8989c2f57c9c3c9d5f3cd42bde201 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486084.bask-pg0308u03a.1056476.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a907162b06a60297fd59cf953ac9baca8d7b6b4021590de16f6a430bdace5b +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486301.bask-pg0308u03a.1062720.0 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486301.bask-pg0308u03a.1062720.0 new file mode 100644 index 0000000000000000000000000000000000000000..31cb42730fa55b4b631909a740f622006ae3e62e --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486301.bask-pg0308u03a.1062720.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1ee95898bbed9f8db1b3627236423c6555c6a2ca08afdf7f4a484d3e4d7cbd +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486531.bask-pg0308u03a.1067728.0 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486531.bask-pg0308u03a.1067728.0 new file mode 100644 index 0000000000000000000000000000000000000000..97438661cf6b9e1ae1a2bdf3edfdc047a17f65fe --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/events.out.tfevents.1759486531.bask-pg0308u03a.1067728.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8f77d04933bfd5967977da4714453ae405b14cacec11c736ffbd9138bd3ae6 +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-08-01_bask-pg0308u03a/events.out.tfevents.1759486167.bask-pg0308u03a.1056476.1 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-08-01_bask-pg0308u03a/events.out.tfevents.1759486167.bask-pg0308u03a.1056476.1 new file mode 100644 index 0000000000000000000000000000000000000000..8b315594457a564d981f913c9096e0d0ad92cf37 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-08-01_bask-pg0308u03a/events.out.tfevents.1759486167.bask-pg0308u03a.1056476.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c801886a960de4edfd3e29f9694fcc75e90611798b28df688fb3dc1495925e03 +size 9162 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-11-39_bask-pg0308u03a/events.out.tfevents.1759486371.bask-pg0308u03a.1062720.1 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-11-39_bask-pg0308u03a/events.out.tfevents.1759486371.bask-pg0308u03a.1062720.1 new file mode 100644 index 0000000000000000000000000000000000000000..80b4853be2d600f56971d167975d11e8fd491621 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-11-39_bask-pg0308u03a/events.out.tfevents.1759486371.bask-pg0308u03a.1062720.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d69159c0dfb511ac8ea00e8f0a1ca9001b9cd5630918ad009b732b5e139c5fb1 +size 9162 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-15-28_bask-pg0308u03a/events.out.tfevents.1759486601.bask-pg0308u03a.1067728.1 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-15-28_bask-pg0308u03a/events.out.tfevents.1759486601.bask-pg0308u03a.1067728.1 new file mode 100644 index 0000000000000000000000000000000000000000..b87c9d1f4bba33b4689797d9ca91ade2dd8add22 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-15-28_bask-pg0308u03a/events.out.tfevents.1759486601.bask-pg0308u03a.1067728.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aefcd39b1350917d2db860776b35d320eb289ee03cfb5100e810336e60684cf +size 116426 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-15-28_bask-pg0308u03a/events.out.tfevents.1759515005.bask-pg0308u03a.1067728.2 b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-15-28_bask-pg0308u03a/events.out.tfevents.1759515005.bask-pg0308u03a.1067728.2 new file mode 100644 index 0000000000000000000000000000000000000000..73f8b112beb1366999dcdc544c03ab853c23916a --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_msea_vlorati_sr/runs/Oct03_11-15-28_bask-pg0308u03a/events.out.tfevents.1759515005.bask-pg0308u03a.1067728.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38a0ab3a2ddb8788ac712d89477844c9e1cbd0861de93b40f92f1539790c651 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61e1843fcd3088fd73e5ab8c2a8cc13f4b45870c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b77653240e7d3d001c6980523a59b6ea98f6bc91603f853435b2fd32aa7e4e3 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a199fa5fa6526251a7619709e4ba4b06ca2a480 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8396738da9c77b7e91355090918929b20e2e8219f5b9f0afc6e84931c65e41 +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..fed8b523d78021caf0933aa4dc8f38a791390afc --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5415239334106445, + "eval_ciou": 0.6206911206245422 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5743964910507202, + "eval_ciou": 0.640878438949585 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.580619752407074, + "eval_ciou": 0.6527875661849976 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5879706740379333, + "eval_ciou": 0.6955618262290955 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5921798348426819, + "eval_ciou": 0.6570607423782349 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6193594932556152, + "eval_ciou": 0.7253115177154541 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6144120097160339, + "eval_ciou": 0.7175951600074768 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6203106045722961, + "eval_ciou": 0.7243441343307495 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6189948320388794, + "eval_ciou": 0.6763254404067993 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6205965876579285, + "eval_ciou": 0.6701367497444153 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6071184873580933, + "eval_ciou": 0.6335629224777222 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7885820865631104, + "eval_ciou": 0.7900774478912354 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8104898929595947, + "eval_ciou": 0.8145949244499207 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7596479654312134, + "eval_ciou": 0.7567470073699951 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7392013669013977, + "eval_ciou": 0.732312798500061 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7797172665596008, + "eval_ciou": 0.7783055305480957 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.69083571434021, + "eval_ciou": 0.6764463782310486 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7492813467979431, + "eval_ciou": 0.7576610445976257 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7473326921463013, + "eval_ciou": 0.750547468662262 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/events.out.tfevents.1759486815.bask-pg0309u06a.2277808.0 b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/events.out.tfevents.1759486815.bask-pg0309u06a.2277808.0 new file mode 100644 index 0000000000000000000000000000000000000000..72b1fba6a9ec7796c010171169be07ae201e2510 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/events.out.tfevents.1759486815.bask-pg0309u06a.2277808.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb82af37e612cd6cfc6d0d054a2534a5462eb81e4e30b70b01c92044eaaf135d +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/runs/Oct03_11-20-12_bask-pg0309u06a/events.out.tfevents.1759486888.bask-pg0309u06a.2277808.1 b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/runs/Oct03_11-20-12_bask-pg0309u06a/events.out.tfevents.1759486888.bask-pg0309u06a.2277808.1 new file mode 100644 index 0000000000000000000000000000000000000000..cc23ee1290bbd6c88b645219ee7a77ff7d6e4cd5 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/runs/Oct03_11-20-12_bask-pg0309u06a/events.out.tfevents.1759486888.bask-pg0309u06a.2277808.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bbff2a13f642b08d52676e96a7169705cff17fa0616aa72d9776bdaf2fe76c +size 116433 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/runs/Oct03_11-20-12_bask-pg0309u06a/events.out.tfevents.1759538005.bask-pg0309u06a.2277808.2 b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/runs/Oct03_11-20-12_bask-pg0309u06a/events.out.tfevents.1759538005.bask-pg0309u06a.2277808.2 new file mode 100644 index 0000000000000000000000000000000000000000..ac7ffaabb049f18cb2e1f84759ed3499833ed291 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_mseadl_vlorati_sr/runs/Oct03_11-20-12_bask-pg0309u06a/events.out.tfevents.1759538005.bask-pg0309u06a.2277808.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ab06ee6d212cf0391bd6df3ce597bde4bb8886408a297cb2722f68ff61894c1 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95aff1f4679d8d7d406e0263da3aa5ddaf9b31d1 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c7487a1fccff1d9687435d89efd329cdb3e399823838852e62656661ca6dc5 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2862142c2eeb44a7a73a033663c432b7bd3155bc --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135e07bc9bec7d62d4b45522492ea0547d3e4690f9d3e5c3b109d9bbfbe478dd +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..751d68fc3ed951765c4816b45c4a35d7d76ad85f --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5482708811759949, + "eval_ciou": 0.6468331813812256 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5750036835670471, + "eval_ciou": 0.6669376492500305 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5921468138694763, + "eval_ciou": 0.629406213760376 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5884107351303101, + "eval_ciou": 0.6435810327529907 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6055757403373718, + "eval_ciou": 0.6697606444358826 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6113269925117493, + "eval_ciou": 0.6145585775375366 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6080509424209595, + "eval_ciou": 0.6102546453475952 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6088388562202454, + "eval_ciou": 0.6153869032859802 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6231482028961182, + "eval_ciou": 0.6567613482475281 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6252795457839966, + "eval_ciou": 0.6559604406356812 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6127145886421204, + "eval_ciou": 0.6343584060668945 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7878564596176147, + "eval_ciou": 0.7911859750747681 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8091955184936523, + "eval_ciou": 0.8126869797706604 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7623993754386902, + "eval_ciou": 0.7634398937225342 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7356936931610107, + "eval_ciou": 0.727637767791748 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7813608646392822, + "eval_ciou": 0.7805576920509338 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6834573149681091, + "eval_ciou": 0.6728175282478333 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7519955635070801, + "eval_ciou": 0.7603971362113953 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7473122477531433, + "eval_ciou": 0.7522236108779907 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/events.out.tfevents.1759351297.bask-pg0308u03a.3036902.0 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/events.out.tfevents.1759351297.bask-pg0308u03a.3036902.0 new file mode 100644 index 0000000000000000000000000000000000000000..9a30abd44b235db22ba5878cc26febc42bbb0ea4 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/events.out.tfevents.1759351297.bask-pg0308u03a.3036902.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51fce08e1ffb5975b10935d1f9446ac6fabbf831303c335db6638a8a32c168c +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/runs/Oct01_21-41-34_bask-pg0308u03a/events.out.tfevents.1759351378.bask-pg0308u03a.3036902.1 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/runs/Oct01_21-41-34_bask-pg0308u03a/events.out.tfevents.1759351378.bask-pg0308u03a.3036902.1 new file mode 100644 index 0000000000000000000000000000000000000000..2d6779ab0b6c2c938cdd47910d4f7b9b5fa415a3 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/runs/Oct01_21-41-34_bask-pg0308u03a/events.out.tfevents.1759351378.bask-pg0308u03a.3036902.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334518fd19c012ef3a33c688fb32c1281ebb5e1ba753fe68b30a268aa040c2b6 +size 116416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/runs/Oct01_21-41-34_bask-pg0308u03a/events.out.tfevents.1759378373.bask-pg0308u03a.3036902.2 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/runs/Oct01_21-41-34_bask-pg0308u03a/events.out.tfevents.1759378373.bask-pg0308u03a.3036902.2 new file mode 100644 index 0000000000000000000000000000000000000000..73ef4bc220f7ef0da1f00568914f544e223c502e --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr/runs/Oct01_21-41-34_bask-pg0308u03a/events.out.tfevents.1759378373.bask-pg0308u03a.3036902.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e8b57492a8f1cd04f65a9109b32a73f94083d208e902a9c43debf2d892fbdf4 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05c18cbd70b6c818f698945ddb731a77dc60917e --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d896972d05b3d279b14f376ee95d2816ad7020e1d35b664cd4b8b83df33d29 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9be264b7827d6592c5e028f6e9c2e468b18e3c6 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c142103205f3efbe4d21cc49e129ffab163db202754f174b4714a6ce3881609a +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c3eaf0ccbb3be9c7a30d990942db4e7f169f6f57 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5589579939842224, + "eval_ciou": 0.5999404191970825 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5726032257080078, + "eval_ciou": 0.6167250275611877 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5848990678787231, + "eval_ciou": 0.6211679577827454 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5943959355354309, + "eval_ciou": 0.5796595215797424 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6058216094970703, + "eval_ciou": 0.6714942455291748 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6229856610298157, + "eval_ciou": 0.6765292882919312 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.627244770526886, + "eval_ciou": 0.7156919240951538 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6339113712310791, + "eval_ciou": 0.704724133014679 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6255011558532715, + "eval_ciou": 0.7015580534934998 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6303748488426208, + "eval_ciou": 0.7075340747833252 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6115955114364624, + "eval_ciou": 0.638154149055481 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7903459668159485, + "eval_ciou": 0.794349730014801 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8083787560462952, + "eval_ciou": 0.8097182512283325 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7646052837371826, + "eval_ciou": 0.761031985282898 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.740681529045105, + "eval_ciou": 0.733224630355835 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7811499834060669, + "eval_ciou": 0.7809284329414368 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6932782530784607, + "eval_ciou": 0.677294135093689 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7504410147666931, + "eval_ciou": 0.7573921084403992 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7493169903755188, + "eval_ciou": 0.7562484741210938 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759429687.bask-pg0308u03a.119085.0 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759429687.bask-pg0308u03a.119085.0 new file mode 100644 index 0000000000000000000000000000000000000000..9fab1e619d974aac4e7119477ff0fcc0332789b8 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759429687.bask-pg0308u03a.119085.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7662702e0a519cb890b78790743f12c5cefe80366fe28b206d33fc738f4ed622 +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759429964.bask-pg0308u03a.127523.0 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759429964.bask-pg0308u03a.127523.0 new file mode 100644 index 0000000000000000000000000000000000000000..74bc89a3527d937c0288db9baa35936f77691c6f --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759429964.bask-pg0308u03a.127523.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2299cc221f2da8c72b5fc45ae6c1e076922f16cd94a4faf33fc7dc51893e5b5e +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759430104.bask-pg0308u03a.131156.0 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759430104.bask-pg0308u03a.131156.0 new file mode 100644 index 0000000000000000000000000000000000000000..4e4305d842dc3dd6c5a801e1bde2b2a871f27a4a --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759430104.bask-pg0308u03a.131156.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6a2dbf262e12c2dd3547fd0f480e49a2f488777f63d4749735b546ad6c8f67 +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759430274.bask-pg0308u03a.134987.0 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759430274.bask-pg0308u03a.134987.0 new file mode 100644 index 0000000000000000000000000000000000000000..aafb97a549cacac31b8a66bab7af4165702b9336 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/events.out.tfevents.1759430274.bask-pg0308u03a.134987.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae96dab61e4e7919227517c4fd1b43335c38ccf9135afbe43e9f48ae4fd1b63 +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-28-04_bask-pg0308u03a/events.out.tfevents.1759429772.bask-pg0308u03a.119085.1 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-28-04_bask-pg0308u03a/events.out.tfevents.1759429772.bask-pg0308u03a.119085.1 new file mode 100644 index 0000000000000000000000000000000000000000..583f52816c12453edc1f8213223ce9ac2605980e --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-28-04_bask-pg0308u03a/events.out.tfevents.1759429772.bask-pg0308u03a.119085.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d05f62c58219be72635e3e0e2f74ce566b4de21e46f52d21ecf29f5949a85da +size 9157 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-32-42_bask-pg0308u03a/events.out.tfevents.1759430033.bask-pg0308u03a.127523.1 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-32-42_bask-pg0308u03a/events.out.tfevents.1759430033.bask-pg0308u03a.127523.1 new file mode 100644 index 0000000000000000000000000000000000000000..b6bc9c63c280daa9619f828efa4602c4b4b4c88c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-32-42_bask-pg0308u03a/events.out.tfevents.1759430033.bask-pg0308u03a.127523.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abea445d59a055c3810f45ec8142c767396c225935620b8a244eed1e07846b41 +size 9157 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-35-01_bask-pg0308u03a/events.out.tfevents.1759430171.bask-pg0308u03a.131156.1 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-35-01_bask-pg0308u03a/events.out.tfevents.1759430171.bask-pg0308u03a.131156.1 new file mode 100644 index 0000000000000000000000000000000000000000..937ef24d10d057a89e400cd989f3610962bd7682 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-35-01_bask-pg0308u03a/events.out.tfevents.1759430171.bask-pg0308u03a.131156.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:672bed56d2ca1fbc1b7c7751fc92bef562d0b2ed1fd8c165684eac3aa02f7317 +size 9157 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-37-52_bask-pg0308u03a/events.out.tfevents.1759430342.bask-pg0308u03a.134987.1 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-37-52_bask-pg0308u03a/events.out.tfevents.1759430342.bask-pg0308u03a.134987.1 new file mode 100644 index 0000000000000000000000000000000000000000..ac113943228349b9d4bfbb91e971cf420ba1ad3a --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-37-52_bask-pg0308u03a/events.out.tfevents.1759430342.bask-pg0308u03a.134987.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49607e03b9da784ff521f3114a130f1aa44e42c3db4f41eba9bfd1ff25a7f70c +size 116423 diff --git a/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-37-52_bask-pg0308u03a/events.out.tfevents.1759481524.bask-pg0308u03a.134987.2 b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-37-52_bask-pg0308u03a/events.out.tfevents.1759481524.bask-pg0308u03a.134987.2 new file mode 100644 index 0000000000000000000000000000000000000000..92ad7f367cce16c1c80283db531ae455ef00a959 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2_2_vlorati_sr_1/runs/Oct02_19-37-52_bask-pg0308u03a/events.out.tfevents.1759481524.bask-pg0308u03a.134987.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e78a1c84aeaba991bd3ccae77de4a1ad0a9c084954d795c3481f18f03ffb0b +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7a65869a442d0d7b632d93aa942a8ab5575cab6 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53172d7ca789e0f5683d141106dda479b99835f8ea3288d9b237f47b6ccaf49c +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de3e7ae5221e122d0278251cbc5ea2f37e75498a --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a163b2dfde7c851c72a2f2d252cfa6cf3e6bfde175b42c3319b6f550599dfa1f +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..d5c4d4b9d6e740d2a7ca96ce25a3ed958cab7376 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5552669167518616, + "eval_ciou": 0.6206966638565063 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5420923233032227, + "eval_ciou": 0.6395506262779236 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5858548283576965, + "eval_ciou": 0.6454641819000244 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5864967107772827, + "eval_ciou": 0.6672537922859192 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5910743474960327, + "eval_ciou": 0.6674425005912781 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6063666343688965, + "eval_ciou": 0.6104642748832703 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6158291697502136, + "eval_ciou": 0.7000030875205994 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6081036925315857, + "eval_ciou": 0.6879830956459045 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6207685470581055, + "eval_ciou": 0.6959645748138428 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6238753795623779, + "eval_ciou": 0.6974939107894897 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6013082265853882, + "eval_ciou": 0.6248176097869873 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7840879559516907, + "eval_ciou": 0.7902311086654663 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8051392436027527, + "eval_ciou": 0.8104892373085022 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7589833736419678, + "eval_ciou": 0.762730062007904 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7358004450798035, + "eval_ciou": 0.7301525473594666 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7796850204467773, + "eval_ciou": 0.7789468169212341 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6807346343994141, + "eval_ciou": 0.6706116795539856 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7458620667457581, + "eval_ciou": 0.7587509751319885 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7427314519882202, + "eval_ciou": 0.7503473162651062 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759395713.bask-pg0309u05a.3185162.0 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759395713.bask-pg0309u05a.3185162.0 new file mode 100644 index 0000000000000000000000000000000000000000..69566058a8a2eb5a202037c87fbaa1348f420a0b --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759395713.bask-pg0309u05a.3185162.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731e5b788d8ed4e6535a2c58e723fa1f3b4a17f41e54724011c4d8881e778ec5 +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759396381.bask-pg0309u05a.3212477.0 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759396381.bask-pg0309u05a.3212477.0 new file mode 100644 index 0000000000000000000000000000000000000000..c19beeb793156d0d042f94594bdf96644f030d21 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759396381.bask-pg0309u05a.3212477.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c9711c346afc2c4413f3566a660886564a98688f43b08d632f8c13e0846b13 +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759396610.bask-pg0309u05a.3218674.0 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759396610.bask-pg0309u05a.3218674.0 new file mode 100644 index 0000000000000000000000000000000000000000..1f4550521b5b719ef6bd03ce7a96b43ab2f518e5 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/events.out.tfevents.1759396610.bask-pg0309u05a.3218674.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b567df272ade5a246272fd2ab8fa2d348025beb4710ce2d68f03d2b5efcaad2 +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-01-49_bask-pg0309u05a/events.out.tfevents.1759395796.bask-pg0309u05a.3185162.1 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-01-49_bask-pg0309u05a/events.out.tfevents.1759395796.bask-pg0309u05a.3185162.1 new file mode 100644 index 0000000000000000000000000000000000000000..054f18744c6f2b80cc04440f63f2d03c306fc138 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-01-49_bask-pg0309u05a/events.out.tfevents.1759395796.bask-pg0309u05a.3185162.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff494c860815a66a0d1e5cb7ed77733985f41abe1edfd864240dc565b157c57 +size 9156 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-12-50_bask-pg0309u05a/events.out.tfevents.1759396462.bask-pg0309u05a.3212477.1 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-12-50_bask-pg0309u05a/events.out.tfevents.1759396462.bask-pg0309u05a.3212477.1 new file mode 100644 index 0000000000000000000000000000000000000000..1fe95ba6226be5545c1adfe35b809b9c8b52b08a --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-12-50_bask-pg0309u05a/events.out.tfevents.1759396462.bask-pg0309u05a.3212477.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce66829896f23b43d513cc80a87bcda67bce69a7fa05967f8b8f0e2a4de5ca7 +size 9156 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-16-47_bask-pg0309u05a/events.out.tfevents.1759396694.bask-pg0309u05a.3218674.1 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-16-47_bask-pg0309u05a/events.out.tfevents.1759396694.bask-pg0309u05a.3218674.1 new file mode 100644 index 0000000000000000000000000000000000000000..b44c80f1ee97d04d36e216befde4d3b94bc93d88 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-16-47_bask-pg0309u05a/events.out.tfevents.1759396694.bask-pg0309u05a.3218674.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e799d007ac1e7ef672229b885c2a68c959977cf113feca3da2bb57484cfba9 +size 116422 diff --git a/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-16-47_bask-pg0309u05a/events.out.tfevents.1759420758.bask-pg0309u05a.3218674.2 b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-16-47_bask-pg0309u05a/events.out.tfevents.1759420758.bask-pg0309u05a.3218674.2 new file mode 100644 index 0000000000000000000000000000000000000000..1537200f1c9f4829fa73f7f7c1668588190fde1b --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp2avg_2_vlorati_sr/runs/Oct02_10-16-47_bask-pg0309u05a/events.out.tfevents.1759420758.bask-pg0309u05a.3218674.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e67ea172113c132e7635a74fecbd00cde30a2fee755d0732b48710cdfa77e2 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f59bf7cdd0423d56ad31b7770d5918b483e4ad18 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492cf578d3c30745a3f9dbb88f4b5d0d917309b941744ff9399e292c94717268 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef5a40f8e236dc16988e945f070a0f0323bd7765 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26305ab0b06527618c8e23f777751252e0f8a60995ead44791b95e7acade99ae +size 7416 diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..11a233e7812f345beb34efe57cd6748ca9ff573d --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.563491702079773, + "eval_ciou": 0.5905183553695679 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5683499574661255, + "eval_ciou": 0.6703479290008545 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5942748188972473, + "eval_ciou": 0.6708864569664001 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5869226455688477, + "eval_ciou": 0.7102519273757935 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6090008616447449, + "eval_ciou": 0.6882244944572449 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5957873463630676, + "eval_ciou": 0.6477413773536682 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5987170338630676, + "eval_ciou": 0.6763898730278015 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6043369770050049, + "eval_ciou": 0.625853955745697 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6038452982902527, + "eval_ciou": 0.6852099895477295 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6061699390411377, + "eval_ciou": 0.6809495687484741 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6005264520645142, + "eval_ciou": 0.6333128809928894 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7889137864112854, + "eval_ciou": 0.7919589281082153 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8086139559745789, + "eval_ciou": 0.8127070665359497 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7656106352806091, + "eval_ciou": 0.7661647200584412 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7407153248786926, + "eval_ciou": 0.7344158291816711 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7834530472755432, + "eval_ciou": 0.7851070165634155 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6918015480041504, + "eval_ciou": 0.6800301671028137 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7531152963638306, + "eval_ciou": 0.7626004219055176 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7461735606193542, + "eval_ciou": 0.7505926489830017 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/events.out.tfevents.1759343033.bask-pg0309u05a.2072563.0 b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/events.out.tfevents.1759343033.bask-pg0309u05a.2072563.0 new file mode 100644 index 0000000000000000000000000000000000000000..3f95897ec0493acf5de845eafa1d6881c25ed6bc --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/events.out.tfevents.1759343033.bask-pg0309u05a.2072563.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5202ebc8f5b56c2c6ddd7cfc2f396f54fadb89c0dcd8b8a555584167b8e644f7 +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/runs/Oct01_19-23-50_bask-pg0309u05a/events.out.tfevents.1759343118.bask-pg0309u05a.2072563.1 b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/runs/Oct01_19-23-50_bask-pg0309u05a/events.out.tfevents.1759343118.bask-pg0309u05a.2072563.1 new file mode 100644 index 0000000000000000000000000000000000000000..a851c01aec920ee0927c2a31374f6afb00324721 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/runs/Oct01_19-23-50_bask-pg0309u05a/events.out.tfevents.1759343118.bask-pg0309u05a.2072563.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980e4d3192a98cea8fa9e53b297838521ddefc2d3e806d6555ed078180018742 +size 116414 diff --git a/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/runs/Oct01_19-23-50_bask-pg0309u05a/events.out.tfevents.1759368721.bask-pg0309u05a.2072563.2 b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/runs/Oct01_19-23-50_bask-pg0309u05a/events.out.tfevents.1759368721.bask-pg0309u05a.2072563.2 new file mode 100644 index 0000000000000000000000000000000000000000..59f735cebf2f9a9749c87dfa035c6a750ee6047f --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_2_vlorati_sr/runs/Oct01_19-23-50_bask-pg0309u05a/events.out.tfevents.1759368721.bask-pg0309u05a.2072563.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b745a4ffd94d0732ffc2cc19d0898fa77043859525b5725aca463e199344219 +size 1402 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bbffab0e1348861b9b3f56d0bd8c4fdc57b98e2 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2a69cf7881c5c5c273fd6b156f9206854906c0178e65a2294477350b17e911 +size 4244119544 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ec1a5e65242929ba9342e68ba070868a6100ea9 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe6a3193c510a07caeec582ba2c46008253e8ed088e3c86c5c5bc9fdd95a9814 +size 7352 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..1d5f329461b4390dfd2fb79cb325653a1d6b0930 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5593529343605042, + "eval_ciou": 0.6489163637161255 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5875852108001709, + "eval_ciou": 0.5963523983955383 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.579784095287323, + "eval_ciou": 0.5806050896644592 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5939780473709106, + "eval_ciou": 0.5917631387710571 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6096924543380737, + "eval_ciou": 0.6828257441520691 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.622477650642395, + "eval_ciou": 0.6112227439880371 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6237930059432983, + "eval_ciou": 0.7058490514755249 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6221110224723816, + "eval_ciou": 0.6821957230567932 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6252615451812744, + "eval_ciou": 0.6666589975357056 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.623760998249054, + "eval_ciou": 0.6808136105537415 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6045253276824951, + "eval_ciou": 0.6384826302528381 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7866906523704529, + "eval_ciou": 0.7909759283065796 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8086105585098267, + "eval_ciou": 0.8127821683883667 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7581214904785156, + "eval_ciou": 0.7582806348800659 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7364990711212158, + "eval_ciou": 0.7310473918914795 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.778446614742279, + "eval_ciou": 0.778328537940979 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6889506578445435, + "eval_ciou": 0.6734749674797058 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7515197992324829, + "eval_ciou": 0.7625178694725037 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7528123259544373, + "eval_ciou": 0.7605093121528625 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759309667.bask-pg0308u29a.2512943.0 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759309667.bask-pg0308u29a.2512943.0 new file mode 100644 index 0000000000000000000000000000000000000000..4272fe9c977c6249952a94799e1e51e9c392d991 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759309667.bask-pg0308u29a.2512943.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b5a615a56fc73f0ce49914325bcc25f60f8435303caaf405f03d93b1c05d42 +size 884 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759311052.bask-pg0308u29a.2558967.0 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759311052.bask-pg0308u29a.2558967.0 new file mode 100644 index 0000000000000000000000000000000000000000..5cabf7ad734f74cbfa3ae8406b04c07234695037 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759311052.bask-pg0308u29a.2558967.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b383348918a04554efa78d73d4c19a4e49d5d74bd4c7a97408c129911b9c28e9 +size 88 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759311338.bask-pg0308u29a.2568688.0 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759311338.bask-pg0308u29a.2568688.0 new file mode 100644 index 0000000000000000000000000000000000000000..cd0637768a714615f3b44dac8127b6bfe1955677 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759311338.bask-pg0308u29a.2568688.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeadf964b1cf8acd28f8c3ac4b8c2f11e26e9a9fd0798c576525e83ae38c8612 +size 20254 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759316943.bask-pg0309u17a.1622785.0 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759316943.bask-pg0309u17a.1622785.0 new file mode 100644 index 0000000000000000000000000000000000000000..04ffbba01b76411c2e9f4e92c396c08c6d460655 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/events.out.tfevents.1759316943.bask-pg0309u17a.1622785.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4aad018fc7191afdabcb5f89f2a789b05966655d3234de8d135d44731cee953 +size 212352 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-07-44_bask-pg0308u29a/events.out.tfevents.1759309748.bask-pg0308u29a.2512943.1 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-07-44_bask-pg0308u29a/events.out.tfevents.1759309748.bask-pg0308u29a.2512943.1 new file mode 100644 index 0000000000000000000000000000000000000000..206d40506b5557bf2a637bbb3a12390f5cd919e0 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-07-44_bask-pg0308u29a/events.out.tfevents.1759309748.bask-pg0308u29a.2512943.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a7634a7d2b37972750d63777604fe6549fb4e61f525ce5d57df6315c7c29f8 +size 9561 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-30-49_bask-pg0308u29a/events.out.tfevents.1759311133.bask-pg0308u29a.2558967.1 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-30-49_bask-pg0308u29a/events.out.tfevents.1759311133.bask-pg0308u29a.2558967.1 new file mode 100644 index 0000000000000000000000000000000000000000..c5d99e7d53070522ceb7b70b7c210ecf138ba593 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-30-49_bask-pg0308u29a/events.out.tfevents.1759311133.bask-pg0308u29a.2558967.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646ca58be0e161b13721e477055816fc2231b9aac2f3a5fc8e5d220c59a6753d +size 9209 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-35-35_bask-pg0308u29a/events.out.tfevents.1759311410.bask-pg0308u29a.2568688.1 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-35-35_bask-pg0308u29a/events.out.tfevents.1759311410.bask-pg0308u29a.2568688.1 new file mode 100644 index 0000000000000000000000000000000000000000..354e0bdea6f3ed4f52a3103d3165b856c02ad575 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_10-35-35_bask-pg0308u29a/events.out.tfevents.1759311410.bask-pg0308u29a.2568688.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2728361706defd403a352b3f69b1e34f5ba876d470a1c490ef00ce9a53f61b5 +size 19649 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_12-09-00_bask-pg0309u17a/events.out.tfevents.1759317007.bask-pg0309u17a.1622785.1 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_12-09-00_bask-pg0309u17a/events.out.tfevents.1759317007.bask-pg0309u17a.1622785.1 new file mode 100644 index 0000000000000000000000000000000000000000..e2792aa15949670c3e41f8da15a09f944130ffbc --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_12-09-00_bask-pg0309u17a/events.out.tfevents.1759317007.bask-pg0309u17a.1622785.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9a5fa6859afcab512fa603e40bce7304bcf3bc25b044c2c7a45c0751acf156 +size 116413 diff --git a/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_12-09-00_bask-pg0309u17a/events.out.tfevents.1759365063.bask-pg0309u17a.1622785.2 b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_12-09-00_bask-pg0309u17a/events.out.tfevents.1759365063.bask-pg0309u17a.1622785.2 new file mode 100644 index 0000000000000000000000000000000000000000..441c493a65d25d246cbae8f9cf1f4e9a175a40c9 --- /dev/null +++ b/lisa-ivl3-2b_nr3_122mlp_vlorati_sr/runs/Oct01_12-09-00_bask-pg0309u17a/events.out.tfevents.1759365063.bask-pg0309u17a.1622785.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f56740e7d9431dfa4e58d52ecc3fa91f1883cbfe8b119bc8953fa4d9986126 +size 1402 diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..913ffc7c8723e329542f0c005823725ee7af37d8 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d94b9f7a6a95fe5e4bc860e87c3284f8b00b068f53a2293ee08e639974075d5 +size 4258284552 diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4bafce6a418eaeaaf1ebb7f97a8bd021fe7b8700 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0427c444a02770311acf949e89a91484cd677db33e4719c6b4867b3a5251e1fc +size 7352 diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..28baabec5704dd2f933dcc79c2c292f415d6b66e --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5462727546691895, + "eval_ciou": 0.6024931073188782 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5598053932189941, + "eval_ciou": 0.5911931991577148 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5736863017082214, + "eval_ciou": 0.5749748945236206 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.590449869632721, + "eval_ciou": 0.6552375555038452 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5868483781814575, + "eval_ciou": 0.6483632326126099 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6017407178878784, + "eval_ciou": 0.6548077464103699 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5960966944694519, + "eval_ciou": 0.64167720079422 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5994389653205872, + "eval_ciou": 0.6598002910614014 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5976763963699341, + "eval_ciou": 0.6564356684684753 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.599377453327179, + "eval_ciou": 0.6396908760070801 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6031112670898438, + "eval_ciou": 0.6524807810783386 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7835877537727356, + "eval_ciou": 0.7882537841796875 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8063337802886963, + "eval_ciou": 0.8119136691093445 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.756846010684967, + "eval_ciou": 0.7557718753814697 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7340245842933655, + "eval_ciou": 0.7261947393417358 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7734711170196533, + "eval_ciou": 0.7719330787658691 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6849915981292725, + "eval_ciou": 0.6721301078796387 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7432234883308411, + "eval_ciou": 0.751539409160614 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7450332641601562, + "eval_ciou": 0.7532289028167725 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/events.out.tfevents.1759398567.bask-pg0309u17a.2930296.0 b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/events.out.tfevents.1759398567.bask-pg0309u17a.2930296.0 new file mode 100644 index 0000000000000000000000000000000000000000..5d733fe19df00d344e7f7fe23898e4d1848c4482 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/events.out.tfevents.1759398567.bask-pg0309u17a.2930296.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959438b0da222c828e173ee28d34c4b1872e499da3267c14351d429be2174923 +size 212352 diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/runs/Oct02_10-49-24_bask-pg0309u17a/events.out.tfevents.1759398642.bask-pg0309u17a.2930296.1 b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/runs/Oct02_10-49-24_bask-pg0309u17a/events.out.tfevents.1759398642.bask-pg0309u17a.2930296.1 new file mode 100644 index 0000000000000000000000000000000000000000..994181ab05e9c16077ed1334f099cbd30e6ed569 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/runs/Oct02_10-49-24_bask-pg0309u17a/events.out.tfevents.1759398642.bask-pg0309u17a.2930296.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4e4cf74f034f494f8f7885b0d3c4cfc23a7d48d80c337bfcacd187e44556369 +size 116411 diff --git a/lisa-ivl3-2b_nr3_123_2_vlorati_sr/runs/Oct02_10-49-24_bask-pg0309u17a/events.out.tfevents.1759447198.bask-pg0309u17a.2930296.2 b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/runs/Oct02_10-49-24_bask-pg0309u17a/events.out.tfevents.1759447198.bask-pg0309u17a.2930296.2 new file mode 100644 index 0000000000000000000000000000000000000000..984d8c25eec9cf64667fe6b29cdeac099074a9fc --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_2_vlorati_sr/runs/Oct02_10-49-24_bask-pg0309u17a/events.out.tfevents.1759447198.bask-pg0309u17a.2930296.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663e565c5832ada4132abf443a2072468756035039585d4024083831ea0f81c4 +size 1402 diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1c51fdbece63ac29cef3f17e3d284fdea321a3e --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9efe72a289ebfa5ce865dc37c1c944cceabc9401137aad27128313208094511 +size 4258284552 diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..001ac06a344a560a778d8419a9c5cd47aa3429c8 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbdffc043d0a213101e97a71d4dc666b880ac29229e84b095d97b8627d1adf4 +size 7352 diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_123_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..690b6cceac610a709da95ece939e115e66caa34d --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5145893692970276, + "eval_ciou": 0.6046803593635559 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5497562289237976, + "eval_ciou": 0.608154296875 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5778596997261047, + "eval_ciou": 0.6512641310691833 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.572151780128479, + "eval_ciou": 0.6383295655250549 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5736947059631348, + "eval_ciou": 0.6471269726753235 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5838074684143066, + "eval_ciou": 0.5825278759002686 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5842145681381226, + "eval_ciou": 0.6610010862350464 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5808438062667847, + "eval_ciou": 0.6311984062194824 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5956627726554871, + "eval_ciou": 0.6158565282821655 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5927153825759888, + "eval_ciou": 0.6266610622406006 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5969268679618835, + "eval_ciou": 0.6473346948623657 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7825651168823242, + "eval_ciou": 0.7878521680831909 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8034126162528992, + "eval_ciou": 0.8102465271949768 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7575039863586426, + "eval_ciou": 0.7627096176147461 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7320328950881958, + "eval_ciou": 0.7271097302436829 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7740505933761597, + "eval_ciou": 0.7754980325698853 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.685105562210083, + "eval_ciou": 0.6766233444213867 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7450615167617798, + "eval_ciou": 0.7572892308235168 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7410337328910828, + "eval_ciou": 0.7480975985527039 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/events.out.tfevents.1759399266.bask-pg0308u12a.193557.0 b/lisa-ivl3-2b_nr3_123_vlorati_sr/events.out.tfevents.1759399266.bask-pg0308u12a.193557.0 new file mode 100644 index 0000000000000000000000000000000000000000..c194ecae4d657e8357a0ce16cedfa106cbcd2f2b --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/events.out.tfevents.1759399266.bask-pg0308u12a.193557.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f9dfaed09a3738a4f1eaa80a30314a8740f73b36fbd600f77d82d4d30b32e5 +size 212352 diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/runs/Oct02_11-01-03_bask-pg0308u12a/events.out.tfevents.1759399351.bask-pg0308u12a.193557.1 b/lisa-ivl3-2b_nr3_123_vlorati_sr/runs/Oct02_11-01-03_bask-pg0308u12a/events.out.tfevents.1759399351.bask-pg0308u12a.193557.1 new file mode 100644 index 0000000000000000000000000000000000000000..4bcb2ec8ef4d8f624958be3854152959aefce2a6 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/runs/Oct02_11-01-03_bask-pg0308u12a/events.out.tfevents.1759399351.bask-pg0308u12a.193557.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3267474f15635e613284c0d0ac629271753bcfcea07f7156d2151e6af77250a3 +size 116404 diff --git a/lisa-ivl3-2b_nr3_123_vlorati_sr/runs/Oct02_11-01-03_bask-pg0308u12a/events.out.tfevents.1759425784.bask-pg0308u12a.193557.2 b/lisa-ivl3-2b_nr3_123_vlorati_sr/runs/Oct02_11-01-03_bask-pg0308u12a/events.out.tfevents.1759425784.bask-pg0308u12a.193557.2 new file mode 100644 index 0000000000000000000000000000000000000000..1f3c06ad4bdace3b010febaa367df4d41038fc9a --- /dev/null +++ b/lisa-ivl3-2b_nr3_123_vlorati_sr/runs/Oct02_11-01-03_bask-pg0308u12a/events.out.tfevents.1759425784.bask-pg0308u12a.193557.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:645b1c9e62d00f0afda3957d7b60d84ee3c274e036485b38325c172320805673 +size 1402 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b535c8d2dca964a90d08abbccf6e716ac4744152 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db1a043f6bc045cf974611fcd4997116667039e3e8e3bbd482744a3202880a43 +size 4258284552 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bfda93469ff454ec33fabdb7d403cc0f0a1d8dad --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7d203446ecb137f7fd482918b8aae0854bccf6bbce5a905f28994797091b06 +size 7416 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..0fc37ec4ff5ba042efb3f2c3ccf23b70faf4ba8f --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5375710725784302, + "eval_ciou": 0.611761212348938 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5579606294631958, + "eval_ciou": 0.6419277787208557 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.559912919998169, + "eval_ciou": 0.6269468069076538 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.583376407623291, + "eval_ciou": 0.6361786127090454 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5813820958137512, + "eval_ciou": 0.6213558912277222 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6018838882446289, + "eval_ciou": 0.6259300112724304 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5978098511695862, + "eval_ciou": 0.6304282546043396 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6029437184333801, + "eval_ciou": 0.621367871761322 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6062439680099487, + "eval_ciou": 0.613494336605072 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6137564778327942, + "eval_ciou": 0.6120806932449341 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5896890163421631, + "eval_ciou": 0.6174530386924744 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7800140976905823, + "eval_ciou": 0.7860043048858643 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8016083836555481, + "eval_ciou": 0.8077698349952698 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7549684643745422, + "eval_ciou": 0.7568424940109253 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7311015725135803, + "eval_ciou": 0.7267637252807617 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7750805020332336, + "eval_ciou": 0.7770675420761108 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6815102100372314, + "eval_ciou": 0.6722325086593628 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.740856945514679, + "eval_ciou": 0.750436544418335 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7399281859397888, + "eval_ciou": 0.751610279083252 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759397995.bask-pg0308u29a.1025038.0 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759397995.bask-pg0308u29a.1025038.0 new file mode 100644 index 0000000000000000000000000000000000000000..4d417a0064e937c5f7618d562bfdc473b0e68f4e --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759397995.bask-pg0308u29a.1025038.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2108e268c99ebe3f48cae8ccb374b434274a54a575cde4af54cdba4385f8c859 +size 1282 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759404208.bask-pg0308u29a.1186867.0 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759404208.bask-pg0308u29a.1186867.0 new file mode 100644 index 0000000000000000000000000000000000000000..cf9772359337401d63b99f3da8f13c24d9243eee --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759404208.bask-pg0308u29a.1186867.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf605e59203f538791d3b0a67310a708aec06025c5aa7eaaaa5aa3fe9bcaf060 +size 212352 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759421771.bask-pg0308u30a.1354705.0 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759421771.bask-pg0308u30a.1354705.0 new file mode 100644 index 0000000000000000000000000000000000000000..be6d6dbe68d279821b21e783837aaccb65302267 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759421771.bask-pg0308u30a.1354705.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6533778b0fbb28b502fbd56d5aeb1ece47c72736d45c9978a37c93fb87c797 +size 88 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759422806.bask-pg0309u06a.1255317.0 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759422806.bask-pg0309u06a.1255317.0 new file mode 100644 index 0000000000000000000000000000000000000000..017672dbd1348bc7f07e4ab70b9d7305b94adb5d --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/events.out.tfevents.1759422806.bask-pg0309u06a.1255317.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4325b16e28f9e3e2ae36b504039fa2b29a84699ecdaaf0016c334284bdf973 +size 88 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_10-39-52_bask-pg0308u29a/events.out.tfevents.1759398068.bask-pg0308u29a.1025038.1 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_10-39-52_bask-pg0308u29a/events.out.tfevents.1759398068.bask-pg0308u29a.1025038.1 new file mode 100644 index 0000000000000000000000000000000000000000..f0332b7a8c85266a0eea016fcc09e51fe9bb73d4 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_10-39-52_bask-pg0308u29a/events.out.tfevents.1759398068.bask-pg0308u29a.1025038.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0647e653a042026747a4935e1d1cb29d7cbeae7bfcd867da4b80cda0a3162985 +size 9772 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_12-23-25_bask-pg0308u29a/events.out.tfevents.1759404274.bask-pg0308u29a.1186867.1 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_12-23-25_bask-pg0308u29a/events.out.tfevents.1759404274.bask-pg0308u29a.1186867.1 new file mode 100644 index 0000000000000000000000000000000000000000..5607e309bbebacc17b939774fc628fee2fbf01ec --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_12-23-25_bask-pg0308u29a/events.out.tfevents.1759404274.bask-pg0308u29a.1186867.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65cc10cc01bcb9dba5d425ca88dc3234fe72a7dc612b3650f26daafed4f2fce +size 116417 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_12-23-25_bask-pg0308u29a/events.out.tfevents.1759451994.bask-pg0308u29a.1186867.2 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_12-23-25_bask-pg0308u29a/events.out.tfevents.1759451994.bask-pg0308u29a.1186867.2 new file mode 100644 index 0000000000000000000000000000000000000000..94f793c24873b8127209dc646236e67100fa8e03 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_12-23-25_bask-pg0308u29a/events.out.tfevents.1759451994.bask-pg0308u29a.1186867.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c816ac843c1b863d35563a76f94811d44f972e66e03dae3d4dfbcebbaba2d4f +size 1402 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_17-16-08_bask-pg0308u30a/events.out.tfevents.1759421853.bask-pg0308u30a.1354705.1 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_17-16-08_bask-pg0308u30a/events.out.tfevents.1759421853.bask-pg0308u30a.1354705.1 new file mode 100644 index 0000000000000000000000000000000000000000..f1591e5845810fed2a075c496ffbce41aeeed4a2 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_17-16-08_bask-pg0308u30a/events.out.tfevents.1759421853.bask-pg0308u30a.1354705.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6163b84c1bf6af487a2834a0f51fde551c647b7f1fbd221df4191b26d2fa409 +size 9151 diff --git a/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_17-33-24_bask-pg0309u06a/events.out.tfevents.1759422888.bask-pg0309u06a.1255317.1 b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_17-33-24_bask-pg0309u06a/events.out.tfevents.1759422888.bask-pg0309u06a.1255317.1 new file mode 100644 index 0000000000000000000000000000000000000000..713c1940316d91d11ab44a8e9e3d4baa43d1ff58 --- /dev/null +++ b/lisa-ivl3-2b_nr3_123avg_2_vlorati_sr/runs/Oct02_17-33-24_bask-pg0309u06a/events.out.tfevents.1759422888.bask-pg0309u06a.1255317.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f735dfdaad7c556a5cfb7dfa70b76885b1c1b2bd1e18e2f050917fdcf4485a4 +size 9151 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b7027d5be1662f3c1898ddd448ba6b46cad60c8 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef8c24a02d9cfa60f1b38e4e4e8017ddb86bd0c96646409ffdd3255730ecf9f +size 4244119544 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..551cd9614cffad7e18e9e01ea316b3e3ff803310 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b7ce5384e6ee339d4c5d0c99a822d9246b59db2c443d07638d94e576927aa6 +size 7352 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nr_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..dff332dacacce7e52aa013edba9797a942f938c7 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,74 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5435585379600525, + "eval_ciou": 0.5839300751686096 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5860790014266968, + "eval_ciou": 0.6421488523483276 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5787966847419739, + "eval_ciou": 0.600932776927948 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.580905020236969, + "eval_ciou": 0.665267288684845 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6076521873474121, + "eval_ciou": 0.6994054913520813 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6189091801643372, + "eval_ciou": 0.6991121172904968 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6154694557189941, + "eval_ciou": 0.6995888352394104 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.625119149684906, + "eval_ciou": 0.6840076446533203 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.622161865234375, + "eval_ciou": 0.7103497385978699 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6222892999649048, + "eval_ciou": 0.6995967030525208 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6018536686897278, + "eval_ciou": 0.6307988166809082 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7830573916435242, + "eval_ciou": 0.788460910320282 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759257773.bask-pg0308u18a.3859698.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759257773.bask-pg0308u18a.3859698.0 new file mode 100644 index 0000000000000000000000000000000000000000..f8200fcac5a4a1c88c4af8bc2f1d5bf790162185 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759257773.bask-pg0308u18a.3859698.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d86a40edcfc375aa094b06b5640601773cc450df7a2d9d372a36aca366f9a3 +size 88 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759257959.bask-pg0308u18a.3864097.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759257959.bask-pg0308u18a.3864097.0 new file mode 100644 index 0000000000000000000000000000000000000000..870f260b51bc7bc18a97fb1aed16b33060a5a40e --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759257959.bask-pg0308u18a.3864097.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55dda217e0a0bdcb5bda03534b9748179c5719e62dcdf9e0ee036bd1ca888add +size 88 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258147.bask-pg0308u18a.3867683.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258147.bask-pg0308u18a.3867683.0 new file mode 100644 index 0000000000000000000000000000000000000000..7bac4c6472e3ac829152571e5ac56f87aa695424 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258147.bask-pg0308u18a.3867683.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583fd019c4bf65efed1dda3940580ccc9e0337b44c9ecde78029b56d9878c7a6 +size 88 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258293.bask-pg0308u18a.3870864.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258293.bask-pg0308u18a.3870864.0 new file mode 100644 index 0000000000000000000000000000000000000000..ae31e7374c9db2b85d1ee7a641f824fd51942e43 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258293.bask-pg0308u18a.3870864.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666fbe9c2d6973ebfaf32dd2cd2e700cc9a2eb0dd4576792f5d3a9a3040bad2e +size 88 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258399.bask-pg0308u18a.3873379.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258399.bask-pg0308u18a.3873379.0 new file mode 100644 index 0000000000000000000000000000000000000000..29b4b5f5e3d9c6d1695486c113aab76edeabd326 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258399.bask-pg0308u18a.3873379.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:474723706162554468f38780653ad45bf9191176cc64c91b54616708dc1f8fe2 +size 88 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258641.bask-pg0308u18a.3877979.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258641.bask-pg0308u18a.3877979.0 new file mode 100644 index 0000000000000000000000000000000000000000..195175cbe5edd0240035521ea06387d15bc2b76a --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258641.bask-pg0308u18a.3877979.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e30de79347ed63eea5edaf19a9b8e572dbaa0fb0fab14e810859b3cef1e9f9 +size 88 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258988.bask-pg0308u18a.3884156.0 b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258988.bask-pg0308u18a.3884156.0 new file mode 100644 index 0000000000000000000000000000000000000000..dad86ebd15a00d1719235aaa1d534fd6f2f73584 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/events.out.tfevents.1759258988.bask-pg0308u18a.3884156.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354304e66289384c002ccc3ce5d23674aca62cdb2164fb79da5bb7220eda4f59 +size 208873 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-42-49_bask-pg0308u18a/events.out.tfevents.1759257855.bask-pg0308u18a.3859698.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-42-49_bask-pg0308u18a/events.out.tfevents.1759257855.bask-pg0308u18a.3859698.1 new file mode 100644 index 0000000000000000000000000000000000000000..ef816236ad5d74b36f647e683b672742d67a2600 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-42-49_bask-pg0308u18a/events.out.tfevents.1759257855.bask-pg0308u18a.3859698.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fe1d60dc1099be855026cbb8e9efbbe8d5bf382e992b834b1d3647322b2a03e +size 9131 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-45-56_bask-pg0308u18a/events.out.tfevents.1759258034.bask-pg0308u18a.3864097.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-45-56_bask-pg0308u18a/events.out.tfevents.1759258034.bask-pg0308u18a.3864097.1 new file mode 100644 index 0000000000000000000000000000000000000000..8095fd7f12df17ccad38f1d697f948847f41ac3d --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-45-56_bask-pg0308u18a/events.out.tfevents.1759258034.bask-pg0308u18a.3864097.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d054e5b953273b29255daf685564fbe429023610e95b0cd3a2d646c68c086a5b +size 9131 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-49-03_bask-pg0308u18a/events.out.tfevents.1759258180.bask-pg0308u18a.3867683.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-49-03_bask-pg0308u18a/events.out.tfevents.1759258180.bask-pg0308u18a.3867683.1 new file mode 100644 index 0000000000000000000000000000000000000000..b554de92de7a78da963213215dd7ac8a382e0b66 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-49-03_bask-pg0308u18a/events.out.tfevents.1759258180.bask-pg0308u18a.3867683.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de342cf256b1f7451c41c65093dd9658317988ba5ab02ef1ebeea0b07ad974c +size 9131 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-51-30_bask-pg0308u18a/events.out.tfevents.1759258323.bask-pg0308u18a.3870864.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-51-30_bask-pg0308u18a/events.out.tfevents.1759258323.bask-pg0308u18a.3870864.1 new file mode 100644 index 0000000000000000000000000000000000000000..db8aec65b656b9b0db038cc79c1ccc543a95beb5 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-51-30_bask-pg0308u18a/events.out.tfevents.1759258323.bask-pg0308u18a.3870864.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9628d266f6ae57c5fb7469d827a8cf94381f1fe0f29d3f5763a71adb3dd7839a +size 9131 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-53-17_bask-pg0308u18a/events.out.tfevents.1759258429.bask-pg0308u18a.3873379.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-53-17_bask-pg0308u18a/events.out.tfevents.1759258429.bask-pg0308u18a.3873379.1 new file mode 100644 index 0000000000000000000000000000000000000000..544d5bf512460032a21461060fc187cd4bf9cb11 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-53-17_bask-pg0308u18a/events.out.tfevents.1759258429.bask-pg0308u18a.3873379.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505792d87a81e94ac5fb73b5f4b32648551282c3700aa1f97421342a2eb0f920 +size 9131 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-57-18_bask-pg0308u18a/events.out.tfevents.1759258725.bask-pg0308u18a.3877979.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-57-18_bask-pg0308u18a/events.out.tfevents.1759258725.bask-pg0308u18a.3877979.1 new file mode 100644 index 0000000000000000000000000000000000000000..57aa9c61901364a34709f67bbd7ed05326a6d6a5 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_19-57-18_bask-pg0308u18a/events.out.tfevents.1759258725.bask-pg0308u18a.3877979.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3eb9849bdad4ff1595520d72e6fc6d0cdd5652339a7f21fc6dd11e09cf5942 +size 9131 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_20-03-05_bask-pg0308u18a/events.out.tfevents.1759259062.bask-pg0308u18a.3884156.1 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_20-03-05_bask-pg0308u18a/events.out.tfevents.1759259062.bask-pg0308u18a.3884156.1 new file mode 100644 index 0000000000000000000000000000000000000000..dcb53e14446c9390f0b3dc0fdf0a56ab9ebd219b --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_20-03-05_bask-pg0308u18a/events.out.tfevents.1759259062.bask-pg0308u18a.3884156.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eefec0da7595b7fcbe7110235ad8fbf7b51b89fb1a52915c09928399fc77f9e2 +size 116397 diff --git a/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_20-03-05_bask-pg0308u18a/events.out.tfevents.1759306140.bask-pg0308u18a.3884156.2 b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_20-03-05_bask-pg0308u18a/events.out.tfevents.1759306140.bask-pg0308u18a.3884156.2 new file mode 100644 index 0000000000000000000000000000000000000000..5d860fef6f19b5667671597749eacf4ff8f3ccd2 --- /dev/null +++ b/lisa-ivl3-2b_nr_vlorati_sr/runs/Sep30_20-03-05_bask-pg0308u18a/events.out.tfevents.1759306140.bask-pg0308u18a.3884156.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bff8f683fe40e2f8d802eec5b0db1d20096a518159c046be5f0dfb1f2793541 +size 380 diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0571d17aeb83f0ff1f6cedff359698facccf84fd --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa5894ee8cd522b89f20bad5fc50fa95367e856852fd1ebabe5ba759d5deb2d4 +size 4211067080 diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8da4a658da91ec2ee5e772b928899306f01d7e8 --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75723d64e2df17f4e6a06cd914ad4b6561fbe8b5ed1fd08414a06b3a5ea81314 +size 7352 diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nrs3_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..87592da892495b9054f484173075f4b9e382a618 --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5293242931365967, + "eval_ciou": 0.6307805776596069 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5570892691612244, + "eval_ciou": 0.6712872982025146 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5568661689758301, + "eval_ciou": 0.6484045386314392 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5700838565826416, + "eval_ciou": 0.6760490536689758 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5907701849937439, + "eval_ciou": 0.6790520548820496 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5843024253845215, + "eval_ciou": 0.6478078961372375 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6035993695259094, + "eval_ciou": 0.7039260864257812 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5898379683494568, + "eval_ciou": 0.6766417026519775 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5942458510398865, + "eval_ciou": 0.6856851577758789 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5963261723518372, + "eval_ciou": 0.6877495050430298 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5885507464408875, + "eval_ciou": 0.6219065189361572 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.780586302280426, + "eval_ciou": 0.7879629135131836 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8040649890899658, + "eval_ciou": 0.811086893081665 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7571799159049988, + "eval_ciou": 0.7596796751022339 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7277324795722961, + "eval_ciou": 0.7248367071151733 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7701988816261292, + "eval_ciou": 0.77016282081604 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6808095574378967, + "eval_ciou": 0.6709620952606201 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7444640398025513, + "eval_ciou": 0.7554171681404114 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.738921046257019, + "eval_ciou": 0.7456541061401367 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/events.out.tfevents.1759277309.bask-pg0308u25a.2556480.0 b/lisa-ivl3-2b_nrs3_vlorati_sr/events.out.tfevents.1759277309.bask-pg0308u25a.2556480.0 new file mode 100644 index 0000000000000000000000000000000000000000..331428bbd791b6dab58a25a2ea3fef9dd0a14caa --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/events.out.tfevents.1759277309.bask-pg0308u25a.2556480.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775ea2a508135ecd59001fa1c6038988bffd1c1138da06b4e05661e001dc223e +size 212352 diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/runs/Oct01_01-08-26_bask-pg0308u25a/events.out.tfevents.1759288193.bask-pg0308u25a.2556480.1 b/lisa-ivl3-2b_nrs3_vlorati_sr/runs/Oct01_01-08-26_bask-pg0308u25a/events.out.tfevents.1759288193.bask-pg0308u25a.2556480.1 new file mode 100644 index 0000000000000000000000000000000000000000..0b4b026105fe166167897e2f40a1a775e628f84f --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/runs/Oct01_01-08-26_bask-pg0308u25a/events.out.tfevents.1759288193.bask-pg0308u25a.2556480.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d553c2e70139b3b09f0b1c0610c3c3611c22360ed9aff4f6fa789f6f1e69c74a +size 116398 diff --git a/lisa-ivl3-2b_nrs3_vlorati_sr/runs/Oct01_01-08-26_bask-pg0308u25a/events.out.tfevents.1759314665.bask-pg0308u25a.2556480.2 b/lisa-ivl3-2b_nrs3_vlorati_sr/runs/Oct01_01-08-26_bask-pg0308u25a/events.out.tfevents.1759314665.bask-pg0308u25a.2556480.2 new file mode 100644 index 0000000000000000000000000000000000000000..0e47c7d3323a7bac0d7a6c296ddf1855ad199f18 --- /dev/null +++ b/lisa-ivl3-2b_nrs3_vlorati_sr/runs/Oct01_01-08-26_bask-pg0308u25a/events.out.tfevents.1759314665.bask-pg0308u25a.2556480.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0bd34c2ff808a53c2a0d0fc847e4b958b762bade7ea8df7cbab2f4be076d3d +size 1402 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbd297fef3e1129a804d359f0ba6fe7187f261f7 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a995fe12d2f9108b80f10dcd98ac6711dbcd30e922357c56934eb194e7fb856 +size 4220510808 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b30d3fa8ee6e206392211fdd1574ca894f9a5e5 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61768640487dab783b78249a5423c55e0b3a59bfeeeec92fda115677c6d27466 +size 7352 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..a4b57a186cf1f63e3a7cfe61cd9d950788729753 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5428761839866638, + "eval_ciou": 0.6181703805923462 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5527018904685974, + "eval_ciou": 0.6565988063812256 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.579323947429657, + "eval_ciou": 0.6545907258987427 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5768314003944397, + "eval_ciou": 0.6615044474601746 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5923226475715637, + "eval_ciou": 0.6605075597763062 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5936264395713806, + "eval_ciou": 0.6160914301872253 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6023526787757874, + "eval_ciou": 0.6594382524490356 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6039935350418091, + "eval_ciou": 0.6802769899368286 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6117456555366516, + "eval_ciou": 0.629892110824585 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6143592596054077, + "eval_ciou": 0.6324818730354309 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6062642931938171, + "eval_ciou": 0.6165124177932739 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.787803053855896, + "eval_ciou": 0.7922309637069702 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.809651255607605, + "eval_ciou": 0.8142685890197754 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7628293633460999, + "eval_ciou": 0.7653301358222961 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7381255030632019, + "eval_ciou": 0.731689989566803 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7843396663665771, + "eval_ciou": 0.784882128238678 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6864758729934692, + "eval_ciou": 0.672863245010376 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7508234977722168, + "eval_ciou": 0.7586160898208618 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7460119724273682, + "eval_ciou": 0.7498002052307129 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759312909.bask-pg0309u05a.1556663.0 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759312909.bask-pg0309u05a.1556663.0 new file mode 100644 index 0000000000000000000000000000000000000000..3eb5172c82009b0c1ae11390b1cba38ea63f2b0a --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759312909.bask-pg0309u05a.1556663.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ef9a15c80435b741241b3847b2b0af1f9a0d76cff400cafdb3abfd9e075b13 +size 20254 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759316405.bask-pg0309u05a.1615150.0 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759316405.bask-pg0309u05a.1615150.0 new file mode 100644 index 0000000000000000000000000000000000000000..4aabb597c636f60ef9855174b41d05b4a0b34f28 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759316405.bask-pg0309u05a.1615150.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58900692ac198a6c4ae84994a68d504bff23754f1cf75c0eec352e1374300d26 +size 88 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759316568.bask-pg0309u05a.1620492.0 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759316568.bask-pg0309u05a.1620492.0 new file mode 100644 index 0000000000000000000000000000000000000000..bca058d0f07bb1e20e7a0c85cb3d47af15ec3990 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/events.out.tfevents.1759316568.bask-pg0309u05a.1620492.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bba43faa37ea1b7428aed8d3acdd30786f2589f2a81b7c44cffe1977a79266a +size 212352 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_11-01-44_bask-pg0309u05a/events.out.tfevents.1759312990.bask-pg0309u05a.1556663.1 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_11-01-44_bask-pg0309u05a/events.out.tfevents.1759312990.bask-pg0309u05a.1556663.1 new file mode 100644 index 0000000000000000000000000000000000000000..51695152ff29f7c5e9498766ea12b93ca800e825 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_11-01-44_bask-pg0309u05a/events.out.tfevents.1759312990.bask-pg0309u05a.1556663.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7843fe4d31985267c9b1f41494647bacbb63d1d3937f3386e65273b1aaf4099 +size 19640 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-00-01_bask-pg0309u05a/events.out.tfevents.1759316487.bask-pg0309u05a.1615150.1 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-00-01_bask-pg0309u05a/events.out.tfevents.1759316487.bask-pg0309u05a.1615150.1 new file mode 100644 index 0000000000000000000000000000000000000000..41fbf9c3d7f75429ea8ea2386e0685e0f4c1a8e8 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-00-01_bask-pg0309u05a/events.out.tfevents.1759316487.bask-pg0309u05a.1615150.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7585305ed9c30aef459a52c3235756bf9e7d06c09f75c1c6c0f4ff46d3e00a08 +size 9132 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-02-46_bask-pg0309u05a/events.out.tfevents.1759316634.bask-pg0309u05a.1620492.1 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-02-46_bask-pg0309u05a/events.out.tfevents.1759316634.bask-pg0309u05a.1620492.1 new file mode 100644 index 0000000000000000000000000000000000000000..aa0bb5e3977049e1cab0539786e7ed2e27e8f5aa --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-02-46_bask-pg0309u05a/events.out.tfevents.1759316634.bask-pg0309u05a.1620492.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b8ce9a9059de52b2276b21a84d2f99afc707965fa6eaf203293c841cae3a9f +size 116404 diff --git a/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-02-46_bask-pg0309u05a/events.out.tfevents.1759341868.bask-pg0309u05a.1620492.2 b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-02-46_bask-pg0309u05a/events.out.tfevents.1759341868.bask-pg0309u05a.1620492.2 new file mode 100644 index 0000000000000000000000000000000000000000..4013243e577d9eab9fe0d97c72e769eea6a28d31 --- /dev/null +++ b/lisa-ivl3-2b_nrs3mlp_vlorati_sr/runs/Oct01_12-02-46_bask-pg0309u05a/events.out.tfevents.1759341868.bask-pg0309u05a.1620492.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a24b77047c211cf816a5d21537b9ab8200ce2b4e7e97b6b3fa422b8692e182c9 +size 1402 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f8b817aff3ba605415a28a3e00de85a2c12f176 --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34902e2e95a321038b6333c1231fcd6a149838d7f06c4dd29412825f7909faf3 +size 4211070232 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e059366adeba0155ba787a090960a5b1d08ccb1 --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbaae23ae7fa337ae4b7b0b6f0e025025ad320b095d4525538f980443804ca7 +size 7352 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_sm_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e638f7105d2f4a43774794db5724020ee25be557 --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5471020936965942, + "eval_ciou": 0.602611780166626 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5672279596328735, + "eval_ciou": 0.6352493166923523 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5891823768615723, + "eval_ciou": 0.6845236420631409 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.591259241104126, + "eval_ciou": 0.6588038206100464 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5976754426956177, + "eval_ciou": 0.6873629689216614 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6133790016174316, + "eval_ciou": 0.6923184990882874 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6181373000144958, + "eval_ciou": 0.701919674873352 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6046135425567627, + "eval_ciou": 0.6792587041854858 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6186716556549072, + "eval_ciou": 0.7080965638160706 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6166353821754456, + "eval_ciou": 0.6947184205055237 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5926748514175415, + "eval_ciou": 0.641939640045166 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7815108895301819, + "eval_ciou": 0.7867216467857361 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8038669228553772, + "eval_ciou": 0.8110463619232178 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7573512196540833, + "eval_ciou": 0.7584792375564575 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7327768802642822, + "eval_ciou": 0.7273551821708679 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7717306613922119, + "eval_ciou": 0.7728754281997681 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6806328892707825, + "eval_ciou": 0.6689378619194031 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7487296462059021, + "eval_ciou": 0.7591226100921631 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7449982166290283, + "eval_ciou": 0.7518823146820068 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_sm_vlorati_sr/events.out.tfevents.1759353791.bask-pg0308u30a.238080.0 b/lisa-ivl3-2b_sm_vlorati_sr/events.out.tfevents.1759353791.bask-pg0308u30a.238080.0 new file mode 100644 index 0000000000000000000000000000000000000000..760e6b5674356cf3eb644c0f72082816c87fe319 --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/events.out.tfevents.1759353791.bask-pg0308u30a.238080.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dc71d266e08eb0b05318c3118ed3c4542d6f9eb89952e0403d75f8b98f5d66 +size 8914 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/events.out.tfevents.1759355273.bask-pg0308u30a.264397.0 b/lisa-ivl3-2b_sm_vlorati_sr/events.out.tfevents.1759355273.bask-pg0308u30a.264397.0 new file mode 100644 index 0000000000000000000000000000000000000000..5b329e212266ef8c299b4a306e0c453b9d90aff1 --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/events.out.tfevents.1759355273.bask-pg0308u30a.264397.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a035260af2d57d452a286a5c0a726e19e52cd1a4961e408e1c31b0ab584a0355 +size 212352 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-23-09_bask-pg0308u30a/events.out.tfevents.1759353877.bask-pg0308u30a.238080.1 b/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-23-09_bask-pg0308u30a/events.out.tfevents.1759353877.bask-pg0308u30a.238080.1 new file mode 100644 index 0000000000000000000000000000000000000000..7114a9bc36f5043579a4cef7248e9e21035052fe --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-23-09_bask-pg0308u30a/events.out.tfevents.1759353877.bask-pg0308u30a.238080.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa1eb460b950aa2c320b12b8162f55b6b1ad1ddb70ccecc0aab59b649d57383 +size 13722 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-47-50_bask-pg0308u30a/events.out.tfevents.1759355341.bask-pg0308u30a.264397.1 b/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-47-50_bask-pg0308u30a/events.out.tfevents.1759355341.bask-pg0308u30a.264397.1 new file mode 100644 index 0000000000000000000000000000000000000000..cf0e8ad4f4c14397c940ca1fdb08668e5078dc92 --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-47-50_bask-pg0308u30a/events.out.tfevents.1759355341.bask-pg0308u30a.264397.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a7806ea8532f00520c65b48bcb2139c25d07b50fea1e9e13b870973faa58613 +size 116394 diff --git a/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-47-50_bask-pg0308u30a/events.out.tfevents.1759385210.bask-pg0308u30a.264397.2 b/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-47-50_bask-pg0308u30a/events.out.tfevents.1759385210.bask-pg0308u30a.264397.2 new file mode 100644 index 0000000000000000000000000000000000000000..a1ef69163ffd7b984e8b9ae73a5bfd2b25b807ae --- /dev/null +++ b/lisa-ivl3-2b_sm_vlorati_sr/runs/Oct01_22-47-50_bask-pg0308u30a/events.out.tfevents.1759385210.bask-pg0308u30a.264397.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85a451d2bfb39bd1b7bdc8b4582a8151f5f42185eae975b6fd709c504fb4bf6 +size 1402 diff --git a/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/config.json b/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/model.safetensors b/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ff1e5f477208f80014ac0a87f20a3ac255669f0 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe2ee0c65045b10593b38c84fd103ef4ed942de2de3443876790d10fd339d3c +size 4211070232 diff --git a/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/training_args.bin b/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e43cd5ed20fdc112ba1c69a1b7275734c555980a --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67672b068c6a274daa7538fabb713ef19013014ab2d74a2e5669f01f792f4cdb +size 7352 diff --git a/lisa-ivl3-2b_vlorati_sr_r64/evaluation_metrics.json b/lisa-ivl3-2b_vlorati_sr_r64/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..ea855ec3fb8f73149e285e6d7a5c31db1ce6593c --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/evaluation_metrics.json @@ -0,0 +1,176 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5681295990943909, + "eval_ciou": 0.6790133118629456 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5785760283470154, + "eval_ciou": 0.6400821208953857 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5926495790481567, + "eval_ciou": 0.6812544465065002 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5939769148826599, + "eval_ciou": 0.7041034698486328 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5996113419532776, + "eval_ciou": 0.6990864276885986 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6085721850395203, + "eval_ciou": 0.6751731038093567 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6209370493888855, + "eval_ciou": 0.7165656089782715 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6109558939933777, + "eval_ciou": 0.734531819820404 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6121669411659241, + "eval_ciou": 0.7159414887428284 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6156916618347168, + "eval_ciou": 0.6529143452644348 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.6364421844482422, + "eval_ciou": 0.7269287109375 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.6232878565788269, + "eval_ciou": 0.6599587798118591 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6300168633460999, + "eval_ciou": 0.7147185206413269 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6153789758682251, + "eval_ciou": 0.6705023646354675 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.641882598400116, + "eval_ciou": 0.7399327158927917 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6268424987792969, + "eval_ciou": 0.6960588693618774 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6319335699081421, + "eval_ciou": 0.6936759352684021 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.6390872001647949, + "eval_ciou": 0.7181963324546814 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6307296752929688, + "eval_ciou": 0.7185564637184143 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.6397485733032227, + "eval_ciou": 0.7129224538803101 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6320482492446899, + "eval_ciou": 0.6539801359176636 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.8015521764755249, + "eval_ciou": 0.8043497204780579 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8224740624427795, + "eval_ciou": 0.8278002142906189 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7818536758422852, + "eval_ciou": 0.7846184968948364 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7620067000389099, + "eval_ciou": 0.7559397220611572 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8001024723052979, + "eval_ciou": 0.800222635269165 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7148984670639038, + "eval_ciou": 0.7048622965812683 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7707029581069946, + "eval_ciou": 0.7785274982452393 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7662516236305237, + "eval_ciou": 0.7695006728172302 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_vlorati_sr_r64/events.out.tfevents.1759232925.bask-pg0308u25a.1785910.0 b/lisa-ivl3-2b_vlorati_sr_r64/events.out.tfevents.1759232925.bask-pg0308u25a.1785910.0 new file mode 100644 index 0000000000000000000000000000000000000000..754236fc5905fbe503f16e2568d9e4f3c33c10c1 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/events.out.tfevents.1759232925.bask-pg0308u25a.1785910.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b98bcf8d94dfb84385762b6c741f78bde2591274a5e16db73aa752688806928 +size 419822 diff --git a/lisa-ivl3-2b_vlorati_sr_r64/runs/Sep30_12-48-38_bask-pg0308u25a/events.out.tfevents.1759233030.bask-pg0308u25a.1785910.1 b/lisa-ivl3-2b_vlorati_sr_r64/runs/Sep30_12-48-38_bask-pg0308u25a/events.out.tfevents.1759233030.bask-pg0308u25a.1785910.1 new file mode 100644 index 0000000000000000000000000000000000000000..0b80568398db8ef7ca0933338cb28c07b89878ad --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/runs/Sep30_12-48-38_bask-pg0308u25a/events.out.tfevents.1759233030.bask-pg0308u25a.1785910.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333dc313adb1963ec6790239383155d7ac6aeeffdb4157c6e38458d7d736ca44 +size 223358 diff --git a/lisa-ivl3-2b_vlorati_sr_r64/runs/Sep30_12-48-38_bask-pg0308u25a/events.out.tfevents.1759284290.bask-pg0308u25a.1785910.2 b/lisa-ivl3-2b_vlorati_sr_r64/runs/Sep30_12-48-38_bask-pg0308u25a/events.out.tfevents.1759284290.bask-pg0308u25a.1785910.2 new file mode 100644 index 0000000000000000000000000000000000000000..eaa9dfa05de564558bae721c32a03b6a7e74a71d --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_r64/runs/Sep30_12-48-38_bask-pg0308u25a/events.out.tfevents.1759284290.bask-pg0308u25a.1785910.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86432995701004ace21a04dfc76277e5618830bed29bfa52ebe659eff40e0b52 +size 1402 diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/config.json b/lisa-ivl3-8b_aati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..413e24ed9d7104d9c96eeb65b41ab698746e16f4 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 3584, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/model-00001-of-00004.safetensors b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76c8a81816823a284b7753ab3fdfd2bd6704ea17 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e346886277fc763cef11ac9255ec91ad75a463e62d5d4236123f7a96a7979038 +size 4991138296 diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/model-00002-of-00004.safetensors b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3eedc40ed7d201e23d96f2cd76893acac5137e9 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47278522b395821c0c1320eb48464659e4e2b5a4c889ccb551ef526e804951a5 +size 4958443072 diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/model-00003-of-00004.safetensors b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38762d9025f0f8fcba154acab1f4b73b0981b738 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b93796e430e08caa8c49e8905f8b7134d3fdcbf17694c1c84302b23dc234f4 +size 4796984024 diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/model-00004-of-00004.safetensors b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81de865504a29f8e08f95b81c6c58d154f121508 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b749810545fbf4cb31dba11a06abc3039ce513d1a226fef84718d4f1a12bcd +size 1322191472 diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/model.safetensors.index.json b/lisa-ivl3-8b_aati_sr/ckpt_model/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e4fbe418b4133a57764ab7223751a047719d21 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/model.safetensors.index.json @@ -0,0 +1,703 @@ +{ + "metadata": { + "total_parameters": 8034335744, + "total_size": 16068671488 + }, + "weight_map": { + "combine_decode_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.weight": "model-00004-of-00004.safetensors", + "language_model.lm_head.weight": "model-00004-of-00004.safetensors", + "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.norm.weight": "model-00003-of-00004.safetensors", + "mlp1.0.bias": "model-00004-of-00004.safetensors", + "mlp1.0.weight": "model-00004-of-00004.safetensors", + "mlp1.1.bias": "model-00004-of-00004.safetensors", + "mlp1.1.weight": "model-00004-of-00004.safetensors", + "mlp1.3.bias": "model-00004-of-00004.safetensors", + "mlp1.3.weight": "model-00004-of-00004.safetensors", + "seg_img_embed.weight": "model-00004-of-00004.safetensors", + "vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.bias": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors", + "vision_model.embeddings.position_embedding": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.weight": "model-00001-of-00004.safetensors" + } +} diff --git a/lisa-ivl3-8b_aati_sr/ckpt_model/training_args.bin b/lisa-ivl3-8b_aati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d128cdb81076d3ebba0abf56b86bce13be0e3139 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecb0b65be7d72038f40048d1fb780cffe1db6f0172861bc588885e215d34553 +size 7416 diff --git a/lisa-ivl3-8b_aati_sr/evaluation_metrics.json b/lisa-ivl3-8b_aati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..9388d14c3cdcf86197b6a5c0f13a61518c7c5d14 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/evaluation_metrics.json @@ -0,0 +1,176 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5199118852615356, + "eval_ciou": 0.617181658744812 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5419926643371582, + "eval_ciou": 0.6379032731056213 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5476846098899841, + "eval_ciou": 0.6413038372993469 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5690851807594299, + "eval_ciou": 0.6464181542396545 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5643561482429504, + "eval_ciou": 0.5863462090492249 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5711941123008728, + "eval_ciou": 0.5771334171295166 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6002376079559326, + "eval_ciou": 0.5256001949310303 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.602410078048706, + "eval_ciou": 0.5920692682266235 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6075235605239868, + "eval_ciou": 0.5958225727081299 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6114643216133118, + "eval_ciou": 0.6215885281562805 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.6205581426620483, + "eval_ciou": 0.6683638691902161 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.6134063005447388, + "eval_ciou": 0.606198251247406 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6193637251853943, + "eval_ciou": 0.6233588457107544 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6374903321266174, + "eval_ciou": 0.6219833493232727 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.631434977054596, + "eval_ciou": 0.6212102770805359 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6324134469032288, + "eval_ciou": 0.5702254772186279 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6409932971000671, + "eval_ciou": 0.6008056402206421 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.6392917037010193, + "eval_ciou": 0.6379099488258362 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6427000164985657, + "eval_ciou": 0.6524733304977417 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.6439707279205322, + "eval_ciou": 0.6526139974594116 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6377514600753784, + "eval_ciou": 0.6415619254112244 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.7975160479545593, + "eval_ciou": 0.801155686378479 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8191339373588562, + "eval_ciou": 0.8281161785125732 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7825404405593872, + "eval_ciou": 0.7852137684822083 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7586740851402283, + "eval_ciou": 0.7505996823310852 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.7924565076828003, + "eval_ciou": 0.7947479486465454 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7145842909812927, + "eval_ciou": 0.7026858329772949 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7609391808509827, + "eval_ciou": 0.7710764408111572 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7528159618377686, + "eval_ciou": 0.7546263337135315 + } +] \ No newline at end of file diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759177569.bask-pg0308u12a.1509572.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759177569.bask-pg0308u12a.1509572.0 new file mode 100644 index 0000000000000000000000000000000000000000..fd716f94391f434736839833af4b9b7cb4add0e3 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759177569.bask-pg0308u12a.1509572.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:820a0bcd9967a61ee22f16fe38ee23afdfed8bc336561920bf93f3c5412d2a6a +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759177729.bask-pg0308u12a.1513743.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759177729.bask-pg0308u12a.1513743.0 new file mode 100644 index 0000000000000000000000000000000000000000..0e8d1576b584becae358cd082171e8e6f6db0fe6 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759177729.bask-pg0308u12a.1513743.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb395367fd9cfce0b4930e412dd19be64b57f808646e1c45b8423b01592c0ba0 +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307195.bask-pg0309u12a.335158.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307195.bask-pg0309u12a.335158.0 new file mode 100644 index 0000000000000000000000000000000000000000..a95230e0fb38a1ce7a2de0408610aae83908af79 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307195.bask-pg0309u12a.335158.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c5eb0fc1febd92009fde13db001791ad08bcbaed1670bdf64ca277a6a25d03 +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307467.bask-pg0309u12a.341928.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307467.bask-pg0309u12a.341928.0 new file mode 100644 index 0000000000000000000000000000000000000000..e2faf73af9790202dc3460249792a972165f5597 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307467.bask-pg0309u12a.341928.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aae4eff74f379fb08fb2f71e3318bec39c213b2160ff68b67815987b87ca30d +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307650.bask-pg0309u12a.348402.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307650.bask-pg0309u12a.348402.0 new file mode 100644 index 0000000000000000000000000000000000000000..a96e699b57ea2f9ba5f74bccb3012a66722ef71a --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759307650.bask-pg0309u12a.348402.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2561ec85c91acbd5055fc1db709a52637d9beb5ad037ffe8827e3d14fe79bc27 +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759308109.bask-pg0309u12a.358561.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759308109.bask-pg0309u12a.358561.0 new file mode 100644 index 0000000000000000000000000000000000000000..5fc2a10e098c3029d267993aba0d66f0aeada05b --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759308109.bask-pg0309u12a.358561.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff57698b9be780fb21f28128f99defa8867ccdcbda81c92f733702c7eca20a68 +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759308396.bask-pg0309u12a.367605.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759308396.bask-pg0309u12a.367605.0 new file mode 100644 index 0000000000000000000000000000000000000000..b81e69d6cfd4642910549f2beeaa63e40bcd79b1 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759308396.bask-pg0309u12a.367605.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f856dee66051057956d6b5a5fbee40acbcda8ea5c906b7552d2052474ca6a1 +size 2078 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759310424.bask-pg0309u12a.407229.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759310424.bask-pg0309u12a.407229.0 new file mode 100644 index 0000000000000000000000000000000000000000..2b29bbb2ccb54ecb35cf214e7f13bc9214659a31 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759310424.bask-pg0309u12a.407229.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6622fbcc021467282973b966f4cae1646a472b9ea96ee838d9b348562dff093 +size 11749 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759317025.bask-pg0309u12a.519924.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759317025.bask-pg0309u12a.519924.0 new file mode 100644 index 0000000000000000000000000000000000000000..66544f32b19fac7a3891229b74eb7cc3f052a949 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759317025.bask-pg0309u12a.519924.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85edf0a127be913d596881af2bca2fb5fad00eeea973d028b4d45d0e16c5aa7 +size 88 diff --git a/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759317206.bask-pg0309u12a.525606.0 b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759317206.bask-pg0309u12a.525606.0 new file mode 100644 index 0000000000000000000000000000000000000000..e5cedb5e58dc1fe162c3e5dbbfb723ca127cdebe --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/events.out.tfevents.1759317206.bask-pg0309u12a.525606.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4286db8c07a657853950fcacdcd3b6decee958a7a75ec0dbfa6cd9c4d1949e +size 419822 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_09-31-04_bask-pg0309u12a/events.out.tfevents.1759307573.bask-pg0309u12a.341928.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-31-04_bask-pg0309u12a/events.out.tfevents.1759307573.bask-pg0309u12a.341928.1 new file mode 100644 index 0000000000000000000000000000000000000000..ddcb56ee01160ecbe92be0fbbb4a07c8f41b1435 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-31-04_bask-pg0309u12a/events.out.tfevents.1759307573.bask-pg0309u12a.341928.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd9fe4dabf83e27c4e2ecf9f7db57baf10d3b4d92ae444338c8b9b567349eda4 +size 9116 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_09-34-07_bask-pg0309u12a/events.out.tfevents.1759307735.bask-pg0309u12a.348402.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-34-07_bask-pg0309u12a/events.out.tfevents.1759307735.bask-pg0309u12a.348402.1 new file mode 100644 index 0000000000000000000000000000000000000000..66fcd4810c5bab6541ea658575a920838f6ab062 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-34-07_bask-pg0309u12a/events.out.tfevents.1759307735.bask-pg0309u12a.348402.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5e8b2261eae862a41d4faefd5b834cdd6976e32d3b46ab11d8de73e2efe6e4 +size 9116 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_09-41-45_bask-pg0309u12a/events.out.tfevents.1759308174.bask-pg0309u12a.358561.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-41-45_bask-pg0309u12a/events.out.tfevents.1759308174.bask-pg0309u12a.358561.1 new file mode 100644 index 0000000000000000000000000000000000000000..234884d8d7556f716cb3b4dcffc7050c5598b1d7 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-41-45_bask-pg0309u12a/events.out.tfevents.1759308174.bask-pg0309u12a.358561.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab3fc95b2df39b273ad961e462cc4304fa74cc45c857d9e958db29faa09d4e7 +size 9116 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_09-46-32_bask-pg0309u12a/events.out.tfevents.1759308472.bask-pg0309u12a.367605.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-46-32_bask-pg0309u12a/events.out.tfevents.1759308472.bask-pg0309u12a.367605.1 new file mode 100644 index 0000000000000000000000000000000000000000..1d178d47f22fab7b0d4665f875aec2d11a6675eb --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_09-46-32_bask-pg0309u12a/events.out.tfevents.1759308472.bask-pg0309u12a.367605.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac526f30468260795419484b65f808bd8a6999124cf486af396917062d8c1b4f +size 10213 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_10-20-20_bask-pg0309u12a/events.out.tfevents.1759310506.bask-pg0309u12a.407229.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_10-20-20_bask-pg0309u12a/events.out.tfevents.1759310506.bask-pg0309u12a.407229.1 new file mode 100644 index 0000000000000000000000000000000000000000..fdbd5ba53e82478f2bd9c388f478c7cd774ccf04 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_10-20-20_bask-pg0309u12a/events.out.tfevents.1759310506.bask-pg0309u12a.407229.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afac69089d4f162de6b1d4e8069ba7fad1e902b005bec3d57bea8ae941e17706 +size 15249 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_12-10-21_bask-pg0309u12a/events.out.tfevents.1759317108.bask-pg0309u12a.519924.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_12-10-21_bask-pg0309u12a/events.out.tfevents.1759317108.bask-pg0309u12a.519924.1 new file mode 100644 index 0000000000000000000000000000000000000000..cf107e578e0c9b23b68a593b20efd3885659a0f4 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_12-10-21_bask-pg0309u12a/events.out.tfevents.1759317108.bask-pg0309u12a.519924.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f029affbafecd9c5540b6ad0eb9eebd9ed919d5713f537aaf19a968cfcef1fed +size 9116 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_12-13-23_bask-pg0309u12a/events.out.tfevents.1759317286.bask-pg0309u12a.525606.1 b/lisa-ivl3-8b_aati_sr/runs/Oct01_12-13-23_bask-pg0309u12a/events.out.tfevents.1759317286.bask-pg0309u12a.525606.1 new file mode 100644 index 0000000000000000000000000000000000000000..7974f1da8ebddfa470375c382e6a1aaf5e66d913 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_12-13-23_bask-pg0309u12a/events.out.tfevents.1759317286.bask-pg0309u12a.525606.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f706a14a3ddad17888a01b3701be2e06c3282ad884c0e7418ba20fa220fc38fd +size 223404 diff --git a/lisa-ivl3-8b_aati_sr/runs/Oct01_12-13-23_bask-pg0309u12a/events.out.tfevents.1759515261.bask-pg0309u12a.525606.2 b/lisa-ivl3-8b_aati_sr/runs/Oct01_12-13-23_bask-pg0309u12a/events.out.tfevents.1759515261.bask-pg0309u12a.525606.2 new file mode 100644 index 0000000000000000000000000000000000000000..b42845800375af6f2ea0e23ae0e440d9dc96c771 --- /dev/null +++ b/lisa-ivl3-8b_aati_sr/runs/Oct01_12-13-23_bask-pg0309u12a/events.out.tfevents.1759515261.bask-pg0309u12a.525606.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48d016f9ad440c76a5e77b45afc389bb2ee0ebcd6cf44d9ebe98cafae0fe604 +size 1402 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/config.json b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..413e24ed9d7104d9c96eeb65b41ab698746e16f4 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 3584, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00001-of-00004.safetensors b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80405b849077fa865e6a7f2730a4ec6dbb987d22 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c604cd9876a8650b469f4ca163304a9cd94e604372f1b1b1093762f2b259438 +size 4991138296 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00002-of-00004.safetensors b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..787238179ff84c5e08888d8d0ca275b97f441618 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487b017a6b386955bf611a3b4e058dffd9a63b897f21439472a4d6c7caef4c29 +size 4958443072 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00003-of-00004.safetensors b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dac93c233b03cd3d2fc286de3c70ac83e7ab1583 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f02b16a4164bf1d095a3ff9547a1dc0e8929cdf0a9ff68ad1c8ae9dfc150dea +size 4796984024 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00004-of-00004.safetensors b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39d9e30cc930f7c74732630b1e9329e56541fc96 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4f916953767fc870aec19e75c27e7ae54455462ac10e8d76a20a4bd9b412bd +size 1322191472 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model.safetensors.index.json b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e4fbe418b4133a57764ab7223751a047719d21 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/model.safetensors.index.json @@ -0,0 +1,703 @@ +{ + "metadata": { + "total_parameters": 8034335744, + "total_size": 16068671488 + }, + "weight_map": { + "combine_decode_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.weight": "model-00004-of-00004.safetensors", + "language_model.lm_head.weight": "model-00004-of-00004.safetensors", + "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.norm.weight": "model-00003-of-00004.safetensors", + "mlp1.0.bias": "model-00004-of-00004.safetensors", + "mlp1.0.weight": "model-00004-of-00004.safetensors", + "mlp1.1.bias": "model-00004-of-00004.safetensors", + "mlp1.1.weight": "model-00004-of-00004.safetensors", + "mlp1.3.bias": "model-00004-of-00004.safetensors", + "mlp1.3.weight": "model-00004-of-00004.safetensors", + "seg_img_embed.weight": "model-00004-of-00004.safetensors", + "vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.bias": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors", + "vision_model.embeddings.position_embedding": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.weight": "model-00001-of-00004.safetensors" + } +} diff --git a/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/training_args.bin b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ab03e9d4de4b3a95fdcca54021d3d50d69cdc6f --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29801d06c2395aeeb95b66eae1c3bf3124c010404abfcc83081c5acd3114a639 +size 7416 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/evaluation_metrics.json b/lisa-ivl3-8b_vlorati_sr_r64/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c79de8525c12f672fbe261875fe34ad4116cc3f3 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/evaluation_metrics.json @@ -0,0 +1,176 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5757561326026917, + "eval_ciou": 0.6217124462127686 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5711309909820557, + "eval_ciou": 0.6146857738494873 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5915148258209229, + "eval_ciou": 0.634421169757843 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5830483436584473, + "eval_ciou": 0.6237179636955261 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5811771154403687, + "eval_ciou": 0.6212621927261353 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5837242603302002, + "eval_ciou": 0.6290962100028992 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.61127108335495, + "eval_ciou": 0.6514936089515686 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6012965440750122, + "eval_ciou": 0.6660168766975403 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6138907670974731, + "eval_ciou": 0.680842936038971 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.630628764629364, + "eval_ciou": 0.6663742661476135 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.6275925040245056, + "eval_ciou": 0.6626681089401245 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.6259500980377197, + "eval_ciou": 0.6327635645866394 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6334192156791687, + "eval_ciou": 0.6701973676681519 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6200255155563354, + "eval_ciou": 0.664580225944519 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.6424046754837036, + "eval_ciou": 0.6866087913513184 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6398801207542419, + "eval_ciou": 0.6765209436416626 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6477933526039124, + "eval_ciou": 0.689948320388794 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.6539167165756226, + "eval_ciou": 0.6800609230995178 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6593981385231018, + "eval_ciou": 0.691003143787384 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.6600930094718933, + "eval_ciou": 0.6901832222938538 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6614076495170593, + "eval_ciou": 0.6761821508407593 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.8036565184593201, + "eval_ciou": 0.8097155690193176 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8203951716423035, + "eval_ciou": 0.8275377750396729 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7823006510734558, + "eval_ciou": 0.7853012084960938 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.765501081943512, + "eval_ciou": 0.7622714042663574 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.7975759506225586, + "eval_ciou": 0.8018394708633423 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7153090834617615, + "eval_ciou": 0.7034051418304443 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7664403915405273, + "eval_ciou": 0.7779760956764221 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7656736969947815, + "eval_ciou": 0.7743217945098877 + } +] \ No newline at end of file diff --git a/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759177949.bask-pg0308u12a.1518632.0 b/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759177949.bask-pg0308u12a.1518632.0 new file mode 100644 index 0000000000000000000000000000000000000000..bca8eb1411e36554fcf0c5b6dd963ccb9d1c6035 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759177949.bask-pg0308u12a.1518632.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f705904bd68e69a48150727ec5efeb6b5f0aef6500d2a38c75aca15f128c6f5 +size 88 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759178645.bask-pg0308u12a.1530740.0 b/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759178645.bask-pg0308u12a.1530740.0 new file mode 100644 index 0000000000000000000000000000000000000000..ef2a2156a6760cf85158b31bcc94c9eec7a2d8d4 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759178645.bask-pg0308u12a.1530740.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcfb6faac30b18991bfda554b1025b4fb8e8debd1fdcc6d1907294241599ab31 +size 21561 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759185031.bask-pg0308u12a.1636216.0 b/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759185031.bask-pg0308u12a.1636216.0 new file mode 100644 index 0000000000000000000000000000000000000000..1e29c213ce683de63da2e4e3f6e2cd211cecf8a1 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/events.out.tfevents.1759185031.bask-pg0308u12a.1636216.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2094f8a123e911515e9e69473c08804787d1028232298ab41a0aafe9d7a2de5 +size 399159 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_21-44-02_bask-pg0308u12a/events.out.tfevents.1759178786.bask-pg0308u12a.1530740.1 b/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_21-44-02_bask-pg0308u12a/events.out.tfevents.1759178786.bask-pg0308u12a.1530740.1 new file mode 100644 index 0000000000000000000000000000000000000000..fb8e09450a14344eaf313b29969d39beb1559356 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_21-44-02_bask-pg0308u12a/events.out.tfevents.1759178786.bask-pg0308u12a.1530740.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be593690a17945858ce2141e0c150031e77bd1a532d656c548f87eb9f6e3740 +size 20203 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_23-30-26_bask-pg0308u12a/events.out.tfevents.1759185226.bask-pg0308u12a.1636216.1 b/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_23-30-26_bask-pg0308u12a/events.out.tfevents.1759185226.bask-pg0308u12a.1636216.1 new file mode 100644 index 0000000000000000000000000000000000000000..4c5516d2145fd365fca48d64c70cc7e927acd2e5 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_23-30-26_bask-pg0308u12a/events.out.tfevents.1759185226.bask-pg0308u12a.1636216.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f62b30a2820abd22bd19a8f516ad2a6f5c4c1295b94e23424174458a9a0b513 +size 212773 diff --git a/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_23-30-26_bask-pg0308u12a/events.out.tfevents.1759319144.bask-pg0308u12a.1636216.2 b/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_23-30-26_bask-pg0308u12a/events.out.tfevents.1759319144.bask-pg0308u12a.1636216.2 new file mode 100644 index 0000000000000000000000000000000000000000..eeafe9ec86c72efe0f5f028c2401bb7d29a7fa34 --- /dev/null +++ b/lisa-ivl3-8b_vlorati_sr_r64/runs/Sep29_23-30-26_bask-pg0308u12a/events.out.tfevents.1759319144.bask-pg0308u12a.1636216.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d8406e7c644b324662ab38938c8af575ec9d03a96d990a28e8beea1c6a87f45 +size 1402