diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2262984b0234dc4abdbc35e360554b911a83f32a --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json @@ -0,0 +1,139 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 896, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..827d9842d6b6c9b24bead26d076fa121e50e5f86 --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe4ec6a49e0b6bc8a746e8f97b9a142098004142e18058d1d0920b1c311a4b4 +size 1895760944 diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..68d092b0f9d22562940a14c199dcd8bb485fb6fb --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c69c399d40df85749505a203a51b3b6a47ec624ca8587a3db35106d69490369 +size 7352 diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..77e41a7c974e79d1a0fc732e4e38caafb7d3b1ad --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json @@ -0,0 +1,314 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.46611911058425903, + "eval_ciou": 0.502875030040741 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.45392024517059326, + "eval_ciou": 0.5104899406433105 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.4961601495742798, + "eval_ciou": 0.5527771711349487 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.4867956042289734, + "eval_ciou": 0.5469930171966553 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5023592114448547, + "eval_ciou": 0.5746171474456787 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5149329900741577, + "eval_ciou": 0.5788349509239197 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.4842008650302887, + "eval_ciou": 0.5401598215103149 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.4861740469932556, + "eval_ciou": 0.509650468826294 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5150892734527588, + "eval_ciou": 0.5765101909637451 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5048616528511047, + "eval_ciou": 0.5617921948432922 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.49304988980293274, + "eval_ciou": 0.5348318219184875 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.4974077343940735, + "eval_ciou": 0.5312325954437256 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.49648475646972656, + "eval_ciou": 0.5170189142227173 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.48747989535331726, + "eval_ciou": 0.5097679495811462 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.4950149953365326, + "eval_ciou": 0.5840950012207031 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.49667176604270935, + "eval_ciou": 0.5483035445213318 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.5069117546081543, + "eval_ciou": 0.5292925238609314 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.5001476407051086, + "eval_ciou": 0.5825715065002441 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.5190213322639465, + "eval_ciou": 0.5663264393806458 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.5078052282333374, + "eval_ciou": 0.550085723400116 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 21.0, + "eval_giou": 0.5005829334259033, + "eval_ciou": 0.5272146463394165 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 22.0, + "eval_giou": 0.5325579643249512, + "eval_ciou": 0.5967816114425659 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 23.0, + "eval_giou": 0.5019457340240479, + "eval_ciou": 0.5450933575630188 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 24.0, + "eval_giou": 0.5054593086242676, + "eval_ciou": 0.5789456367492676 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 25.0, + "eval_giou": 0.5179678201675415, + "eval_ciou": 0.5849955677986145 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 26.0, + "eval_giou": 0.49324876070022583, + "eval_ciou": 0.5512343645095825 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 27.0, + "eval_giou": 0.526308536529541, + "eval_ciou": 0.5917784571647644 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 28.0, + "eval_giou": 0.5196185111999512, + "eval_ciou": 0.5830491781234741 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 29.0, + "eval_giou": 0.5260405540466309, + "eval_ciou": 0.5873146057128906 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 30.0, + "eval_giou": 0.5176364779472351, + "eval_ciou": 0.5502902865409851 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 31.0, + "eval_giou": 0.5242363810539246, + "eval_ciou": 0.5449221134185791 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 32.0, + "eval_giou": 0.5220826864242554, + "eval_ciou": 0.548717737197876 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 33.0, + "eval_giou": 0.532138466835022, + "eval_ciou": 0.5798225402832031 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 34.0, + "eval_giou": 0.5219024419784546, + "eval_ciou": 0.5670239925384521 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 35.0, + "eval_giou": 0.5276386141777039, + "eval_ciou": 0.5819328427314758 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 36.0, + "eval_giou": 0.5373537540435791, + "eval_ciou": 0.5725668668746948 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 37.0, + "eval_giou": 0.5370974540710449, + "eval_ciou": 0.5747125744819641 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 38.0, + "eval_giou": 0.534446120262146, + "eval_ciou": 0.5705200433731079 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 39.0, + "eval_giou": 0.5382332801818848, + "eval_ciou": 0.5789334177970886 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 40.0, + "eval_giou": 0.5384411811828613, + "eval_ciou": 0.5742336511611938 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 40.0, + "eval_giou": 0.5279056429862976, + "eval_ciou": 0.5487434267997742 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 40.0, + "eval_giou": 0.8133376836776733, + "eval_ciou": 0.8157490491867065 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 40.0, + "eval_giou": 0.8291460871696472, + "eval_ciou": 0.8322128653526306 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 40.0, + "eval_giou": 0.7959555387496948, + "eval_ciou": 0.7960028052330017 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 40.0, + "eval_giou": 0.7633941173553467, + "eval_ciou": 0.7513891458511353 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 40.0, + "eval_giou": 0.8041790723800659, + "eval_ciou": 0.7988309264183044 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 40.0, + "eval_giou": 0.7283210754394531, + "eval_ciou": 0.7128502130508423 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 40.0, + "eval_giou": 0.7768380641937256, + "eval_ciou": 0.7853620052337646 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 40.0, + "eval_giou": 0.772633969783783, + "eval_ciou": 0.7714908719062805 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 40.0, + "eval_giou": 0.33939608931541443, + "eval_ciou": 0.389335036277771 + }, + { + "val_dataset": "grefcoco|unc|testA", + "epoch": 40.0, + "eval_giou": 0.48885226249694824, + "eval_ciou": 0.5180455446243286 + }, + { + "val_dataset": "grefcoco|unc|testB", + "epoch": 40.0, + "eval_giou": 0.42087891697883606, + "eval_ciou": 0.45711269974708557 + } +] \ No newline at end of file diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0 b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0 new file mode 100644 index 0000000000000000000000000000000000000000..a2161c327c8e42c1326dcc85f7b5db6207f0ca89 --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc1ce8b056a496430e9b93fc159c074fcc2897c8d3ffa4595ca9a42b640bcf2 +size 838934 diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1 b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1 new file mode 100644 index 0000000000000000000000000000000000000000..09a4802746922861c660ba482f5baa7e1828bbec --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aac295c5e250979475524c9c31a0d9338b8acbd61cfbc94c6765763ddf61bd9 +size 438663 diff --git a/ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2 b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2 new file mode 100644 index 0000000000000000000000000000000000000000..e6949cae604ede93139b890a1e68e3202062ed76 --- /dev/null +++ b/ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5f9fde132a38c1d35c3ca50825c862adb145378bf3c2fe9c0a241496964c2b +size 1876 diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json b/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors b/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8915ef464e4b938b4cb7b549ee1c195d52e4bc4d --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86bb78ca606874d74621c7804c52b7f099d1e33cc07b872caa1ded9beeed24b6 +size 4234672656 diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin b/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..531f8951f24e137011ffd1b57e9f747f4fa669f8 --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae2bdf9ab86563d24903333ab4098296993fbaf54682b00a9f64a59b450883f0 +size 7352 diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json b/ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..036d33622664aeb81fc91a043c5c65d36f83962f --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json @@ -0,0 +1,182 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5472875237464905, + "eval_ciou": 0.6169445514678955 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5826247930526733, + "eval_ciou": 0.6471297740936279 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.571751594543457, + "eval_ciou": 0.58400559425354 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6117531061172485, + "eval_ciou": 0.7337754368782043 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.603766679763794, + "eval_ciou": 0.6782984733581543 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5942732691764832, + "eval_ciou": 0.6343610882759094 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5888954997062683, + "eval_ciou": 0.5462635159492493 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6029112339019775, + "eval_ciou": 0.672645092010498 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5829816460609436, + "eval_ciou": 0.6506213545799255 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5900739431381226, + "eval_ciou": 0.5817593932151794 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.5904648900032043, + "eval_ciou": 0.6257218718528748 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.6047389507293701, + "eval_ciou": 0.662787139415741 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.632697582244873, + "eval_ciou": 0.6868629455566406 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6089114546775818, + "eval_ciou": 0.662788987159729 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.6122798323631287, + "eval_ciou": 0.6807273030281067 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6197442412376404, + "eval_ciou": 0.7008298635482788 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6097345352172852, + "eval_ciou": 0.6946455836296082 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.6123113036155701, + "eval_ciou": 0.6853691339492798 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6210272908210754, + "eval_ciou": 0.6771239042282104 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.617154061794281, + "eval_ciou": 0.6615597605705261 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.61573725938797, + "eval_ciou": 0.6199108958244324 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.8253726959228516, + "eval_ciou": 0.8281757831573486 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.839383602142334, + "eval_ciou": 0.8423187136650085 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.8076790571212769, + "eval_ciou": 0.8090101480484009 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7864928245544434, + "eval_ciou": 0.7770533561706543 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8159601092338562, + "eval_ciou": 0.8131332993507385 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7557609677314758, + "eval_ciou": 0.7428407073020935 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7897712588310242, + "eval_ciou": 0.7977313995361328 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7885610461235046, + "eval_ciou": 0.7968337535858154 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.35536685585975647, + "eval_ciou": 0.41349032521247864 + } +] \ No newline at end of file diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0 b/ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0 new file mode 100644 index 0000000000000000000000000000000000000000..428d66a9334a205f7f833009aa335e2cd1a6c7d0 --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4316c0df3dd2f18914626eec6a94a4b077712da715160d288c664f9f079ae2a8 +size 420319 diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1 b/ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1 new file mode 100644 index 0000000000000000000000000000000000000000..21d14a262c356c4c45a216a65cef93b8b73d4791 --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f3cb7d64c9b2d4fa9b2938a235243fd234ba896ba3983d796fbfb53e356b0f2 +size 223357 diff --git a/ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2 b/ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2 new file mode 100644 index 0000000000000000000000000000000000000000..7d430d86415df4e42c19a40a4c1b2be324ef89ff --- /dev/null +++ b/ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e29cf07dd781a544a2fd697efb429299042fcda8397002c3d4809627b4f141e +size 1548 diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb1b43b1778a9e68c06a209296437439a7ff3ad7 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68b4bf0ac4fb7851d80063b44ba49742267b6ce786b99e0e683a5011880a472 +size 4234672656 diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4eb7c90dc0ae0ab5d7820f226e3b31b91a34c48d --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c483d8e4bccc79b2041f4255aa76528d81438cb33e8ab6094f36f23a18c22fe +size 7352 diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..fcb6b88258b63bfaa1b3bf624d29d90cb09f3503 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json @@ -0,0 +1,104 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.14180939930424763, + "eval_giou": 0.5170324444770813, + "eval_ciou": 0.5942871570587158 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.28361879860849526, + "eval_giou": 0.5256783366203308, + "eval_ciou": 0.6608301997184753 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.4254281979127429, + "eval_giou": 0.5604663491249084, + "eval_ciou": 0.6650714874267578 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.5672375972169905, + "eval_giou": 0.5549952387809753, + "eval_ciou": 0.6755744218826294 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.7090469965212381, + "eval_giou": 0.5817117691040039, + "eval_ciou": 0.6808510422706604 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.8508563958254858, + "eval_giou": 0.5875627398490906, + "eval_ciou": 0.6574282050132751 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 0.9926657951297334, + "eval_giou": 0.6035187840461731, + "eval_ciou": 0.6785731911659241 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 1.0, + "eval_giou": 0.5871202349662781, + "eval_ciou": 0.6128882765769958 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 1.0, + "eval_giou": 0.7931445240974426, + "eval_ciou": 0.7957648634910583 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 1.0, + "eval_giou": 0.8122290372848511, + "eval_ciou": 0.8159088492393494 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 1.0, + "eval_giou": 0.771130383014679, + "eval_ciou": 0.7735289335250854 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 1.0, + "eval_giou": 0.7441046833992004, + "eval_ciou": 0.7393963932991028 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 1.0, + "eval_giou": 0.7816813588142395, + "eval_ciou": 0.7822470664978027 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 1.0, + "eval_giou": 0.7043335437774658, + "eval_ciou": 0.7011061906814575 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 1.0, + "eval_giou": 0.7601777911186218, + "eval_ciou": 0.7689756751060486 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 1.0, + "eval_giou": 0.7574023604393005, + "eval_ciou": 0.7668260931968689 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 1.0, + "eval_giou": 0.3440900146961212, + "eval_ciou": 0.40447333455085754 + } +] \ No newline at end of file diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0 b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0 new file mode 100644 index 0000000000000000000000000000000000000000..6a9d1d407219695544a754fcab80085394c03e8f --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a814830c21483a873a9fb7aae3df2dd0da596f8e099f805420b4f97ddee661e +size 237250 diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1 b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1 new file mode 100644 index 0000000000000000000000000000000000000000..5514cb0af026f944d8efae06530fbbe2c53f4c74 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696272af75f56d031defab3f9ee22e1ff2152eaf31d3d26dd707e500237f3873 +size 129472 diff --git a/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2 b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2 new file mode 100644 index 0000000000000000000000000000000000000000..32872a522297547bd8068372ae671c295deb40c0 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ef9166bd90585ddfb26787975266c8673785ef08e917d33dcc3947b38b8781 +size 1548 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dffcbffcc12457c538969bd800d693a92f30d378 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf3469a88769c9cf80872f702a2b69af7fc3de3e13b5fc72e57313bc7e6fd6d +size 4234672656 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f6324a64dd8b75c5dfcb0abc19b992ae8d64860 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd27079f7bb014416e853c6dd7c8cb2cc3a0ed270d7689a6ce1c20a394b44ed +size 7352 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..2eb6b4e27dece2a650f665b17786dfcc371a8e3a --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json @@ -0,0 +1,182 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.545238733291626, + "eval_ciou": 0.6227507591247559 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5574519634246826, + "eval_ciou": 0.5911077857017517 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5884220600128174, + "eval_ciou": 0.6480932831764221 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.567903459072113, + "eval_ciou": 0.6106956601142883 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5836549401283264, + "eval_ciou": 0.6618658900260925 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6009659767150879, + "eval_ciou": 0.601414144039154 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5822204947471619, + "eval_ciou": 0.6209443211555481 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5958440899848938, + "eval_ciou": 0.6226491332054138 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5826206803321838, + "eval_ciou": 0.6304926872253418 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6073910593986511, + "eval_ciou": 0.6384271383285522 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.5889663100242615, + "eval_ciou": 0.5859989523887634 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.5989387631416321, + "eval_ciou": 0.5917373895645142 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6207078695297241, + "eval_ciou": 0.6845619678497314 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6023203730583191, + "eval_ciou": 0.6288123726844788 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.6015496850013733, + "eval_ciou": 0.6256543397903442 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.6224597692489624, + "eval_ciou": 0.6749593019485474 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.6151285767555237, + "eval_ciou": 0.6607990860939026 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.6089848279953003, + "eval_ciou": 0.6714296936988831 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.6193683743476868, + "eval_ciou": 0.66315758228302 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.6168807148933411, + "eval_ciou": 0.6491692066192627 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6177780628204346, + "eval_ciou": 0.623653769493103 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.823724627494812, + "eval_ciou": 0.8269159197807312 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8385549783706665, + "eval_ciou": 0.8419386148452759 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.8077682852745056, + "eval_ciou": 0.8096453547477722 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7876097559928894, + "eval_ciou": 0.7777907848358154 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8152230978012085, + "eval_ciou": 0.8130303025245667 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7466538548469543, + "eval_ciou": 0.7298356294631958 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7895915508270264, + "eval_ciou": 0.7953519821166992 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7863814830780029, + "eval_ciou": 0.790006697177887 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.34861522912979126, + "eval_ciou": 0.4008709788322449 + } +] \ No newline at end of file diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0 b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0 new file mode 100644 index 0000000000000000000000000000000000000000..27f7193a32b69e4c755966d58c88ac2ab274ac2c --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bdf4f69c712289c45dc7f3d2fc1c00080ed867cb126c8893a7cdb9e6410c575 +size 5269 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0 b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0 new file mode 100644 index 0000000000000000000000000000000000000000..dd7aa5388f8a97da58375ed3e83e3b39fd6db383 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d3e677ef10d742ca354d04dc083cd8fac9d4f3438fcb2105ec954c1cb39554 +size 420319 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1 b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1 new file mode 100644 index 0000000000000000000000000000000000000000..59c89139b78f9003b22c20e758802b8077603174 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef6b38d60c181687f36e0184bd79a6ca1ad752797613b8b065873a3b4246d0b +size 11734 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1 b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1 new file mode 100644 index 0000000000000000000000000000000000000000..e2fa05d7dd8e7819801b741d66f6a72b1a78be2c --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aca35e8105442e8225c238eb97ea990547dd7850e1d79d809ea06ab4a8bbf52 +size 223363 diff --git a/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2 b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2 new file mode 100644 index 0000000000000000000000000000000000000000..775e21ca224deb1981beb8baebc1338f20819a13 --- /dev/null +++ b/ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea46edb0001154b124f3e0221461332c2861bdb219b80ce8118ea43f68f236d3 +size 1548 diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json b/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..399712f9a4768e02d92c7d7f9120dc387a216d0d --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e47cc709ec1ba69f2d9d95a1cb1772d8724fbe12b8127c78a3f24363254eb2 +size 4234672656 diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..701f89c92decfc757beecfe21220b96d8a2f31dc --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376cf9ecb749a538a45e36600313b7a343b1da588de84846095f57565d33118a +size 7352 diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json b/lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..564876758ec2b5a81b14a23487f33a8e8a610edd --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json @@ -0,0 +1,134 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5275816917419434, + "eval_ciou": 0.6127659678459167 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5785483121871948, + "eval_ciou": 0.6731550693511963 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5671323537826538, + "eval_ciou": 0.6406646966934204 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5716913342475891, + "eval_ciou": 0.6880943775177002 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6032226085662842, + "eval_ciou": 0.669122576713562 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6153417229652405, + "eval_ciou": 0.6467410326004028 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6117483377456665, + "eval_ciou": 0.6864674687385559 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.613699734210968, + "eval_ciou": 0.6648719906806946 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6249428987503052, + "eval_ciou": 0.6983655691146851 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6295068860054016, + "eval_ciou": 0.6981709599494934 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6091115474700928, + "eval_ciou": 0.6346589922904968 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7866575121879578, + "eval_ciou": 0.7891850471496582 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8079327344894409, + "eval_ciou": 0.8123739361763 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7600301504135132, + "eval_ciou": 0.7600919008255005 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7380849719047546, + "eval_ciou": 0.7316219210624695 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7775301337242126, + "eval_ciou": 0.7753238677978516 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6858305335044861, + "eval_ciou": 0.6726531982421875 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7513312697410583, + "eval_ciou": 0.7607209086418152 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7468341588973999, + "eval_ciou": 0.7547957897186279 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.3461865484714508, + "eval_ciou": 0.4059288203716278 + }, + { + "val_dataset": "grefcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.5068609714508057, + "eval_ciou": 0.5432214736938477 + }, + { + "val_dataset": "grefcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.4215650260448456, + "eval_ciou": 0.46161597967147827 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0 b/lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0 new file mode 100644 index 0000000000000000000000000000000000000000..d03392157e224906d748ab767924c41b5918f8b0 --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9753975d394fb29fe790a725030cfb2eb7164a9c9772c6c58f150b86032ff6d2 +size 213843 diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1 b/lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1 new file mode 100644 index 0000000000000000000000000000000000000000..28732909b92f527b8482fc750b9d7a2e6da3175b --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb122b4b83d6c98e0202bcb24e7e4d8b2ecf6127e8749eb7ad7e39ee8ee4bee3 +size 116402 diff --git a/lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2 b/lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2 new file mode 100644 index 0000000000000000000000000000000000000000..43bb5f46c246826cd32b04aeaaf60217f5f81da0 --- /dev/null +++ b/lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049fe35ce139736f5cff725d493b6559f3cb8ebe44b73c1dada7d658c239b209 +size 1840 diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..496f12fff9db4263ab39db677001ee8cb01d0da8 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8abfa0531fab5dd062f2adc5d1998d253a3534af2fbee5966ca5f440b8b56d +size 4234672656 diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..452debb137bcfcb22488338f1223573710a5e338 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f838b765f1d9e641210de93267752a4124e94ae40523c3d484eed6abea4144 +size 7352 diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..2e21c01cbe0cd1acae00b971a17eb16946b5508c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json @@ -0,0 +1,182 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5319168567657471, + "eval_ciou": 0.6217775344848633 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5545085668563843, + "eval_ciou": 0.5805025696754456 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5865544676780701, + "eval_ciou": 0.5943950414657593 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5605677366256714, + "eval_ciou": 0.618609607219696 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5957074165344238, + "eval_ciou": 0.6184157729148865 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5842458605766296, + "eval_ciou": 0.5878432989120483 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5693845748901367, + "eval_ciou": 0.5511792302131653 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.589174747467041, + "eval_ciou": 0.601881742477417 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5905624628067017, + "eval_ciou": 0.5573661923408508 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6161700487136841, + "eval_ciou": 0.6625725626945496 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 11.0, + "eval_giou": 0.6013333797454834, + "eval_ciou": 0.6199454665184021 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 12.0, + "eval_giou": 0.5980168581008911, + "eval_ciou": 0.6256346106529236 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 13.0, + "eval_giou": 0.6127091646194458, + "eval_ciou": 0.6268919706344604 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 14.0, + "eval_giou": 0.6001991033554077, + "eval_ciou": 0.6283772587776184 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 15.0, + "eval_giou": 0.5891889333724976, + "eval_ciou": 0.6160405278205872 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 16.0, + "eval_giou": 0.583220899105072, + "eval_ciou": 0.6409042477607727 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 17.0, + "eval_giou": 0.5922881364822388, + "eval_ciou": 0.6401846408843994 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 18.0, + "eval_giou": 0.5957611799240112, + "eval_ciou": 0.6475955843925476 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 19.0, + "eval_giou": 0.606143593788147, + "eval_ciou": 0.6534363031387329 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 20.0, + "eval_giou": 0.6004212498664856, + "eval_ciou": 0.6504445672035217 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 20.0, + "eval_giou": 0.6277483105659485, + "eval_ciou": 0.6647483706474304 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.8239029049873352, + "eval_ciou": 0.825985848903656 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8409322500228882, + "eval_ciou": 0.8456533551216125 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 20.0, + "eval_giou": 0.8107873201370239, + "eval_ciou": 0.8120189905166626 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 20.0, + "eval_giou": 0.7863824367523193, + "eval_ciou": 0.7759678959846497 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 20.0, + "eval_giou": 0.8180026412010193, + "eval_ciou": 0.8148158192634583 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 20.0, + "eval_giou": 0.7562121748924255, + "eval_ciou": 0.7416884899139404 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 20.0, + "eval_giou": 0.7904837131500244, + "eval_ciou": 0.799082338809967 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 20.0, + "eval_giou": 0.7864062190055847, + "eval_ciou": 0.7907365560531616 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 20.0, + "eval_giou": 0.3480578660964966, + "eval_ciou": 0.40478312969207764 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0 b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0 new file mode 100644 index 0000000000000000000000000000000000000000..d4e808e5b876a4cba58788cdfc21072b3ed319c6 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660498ff574c492bf5d679c2ab2000082d9d54adb74cd54a33d31ee5e50ed85d +size 420319 diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1 b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1 new file mode 100644 index 0000000000000000000000000000000000000000..aaf05956a23918ad9863d787a0b686674a40a560 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c9a2e3d5e655d1a3720f020272d78ce223c539dd74f22c129512a6a9c19465 +size 223365 diff --git a/lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2 b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2 new file mode 100644 index 0000000000000000000000000000000000000000..046c5dd2eabbb6486ee31347d6666aa7386cfbd4 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9651efafd9183c3de60fe034fc2361230008f6a877418be63c966d679c72114e +size 1548 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9c423a123c588643fccc69e7c7dab3252bb7e9c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139fdbe8833d72b40b2877084cee6be809601e8730f050f271995f16007a95f4 +size 4234672656 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..70ba98e1927482ba284c7bc613645b0458b8fbf6 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aaa7c7f7fcd6affd89f09fc49b3e6065ac81102006c76127b26eb69297651b1 +size 7352 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..aafa2e6de48548c0459fb84c184a89f4a5e62122 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json @@ -0,0 +1,134 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5569912195205688, + "eval_ciou": 0.6086179614067078 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5633984208106995, + "eval_ciou": 0.6330024600028992 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5652901530265808, + "eval_ciou": 0.6073711514472961 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5960856080055237, + "eval_ciou": 0.6793828010559082 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6029606461524963, + "eval_ciou": 0.7154921293258667 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5903835892677307, + "eval_ciou": 0.569995105266571 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.613521158695221, + "eval_ciou": 0.6969940066337585 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6117329597473145, + "eval_ciou": 0.6676673889160156 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6173275113105774, + "eval_ciou": 0.6789456009864807 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6149329543113708, + "eval_ciou": 0.6718701124191284 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.609404444694519, + "eval_ciou": 0.6132881045341492 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7893159985542297, + "eval_ciou": 0.7933170199394226 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8105526566505432, + "eval_ciou": 0.8163524270057678 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7612078785896301, + "eval_ciou": 0.7619940042495728 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7366273999214172, + "eval_ciou": 0.7300769686698914 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7783792614936829, + "eval_ciou": 0.7807698845863342 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6872988343238831, + "eval_ciou": 0.6766538619995117 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7511187195777893, + "eval_ciou": 0.7614018321037292 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7464777231216431, + "eval_ciou": 0.7518000602722168 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.34809717535972595, + "eval_ciou": 0.40297430753707886 + }, + { + "val_dataset": "grefcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.5119317770004272, + "eval_ciou": 0.5438946485519409 + }, + { + "val_dataset": "grefcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.42917221784591675, + "eval_ciou": 0.46293896436691284 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0 new file mode 100644 index 0000000000000000000000000000000000000000..610cc23e9b6d08a0fac5f243e0a5783a3928a573 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a500c6eab72e54c5f9f22ebdab934bd6697b3029ccac498ad538fbad5ec125c8 +size 213843 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0 new file mode 100644 index 0000000000000000000000000000000000000000..da1c37fd8b80c55bd4aea126489bc9b84271e1e9 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8104cc4761ea3f319ab73a87e86d1bacfacc2e1d101f9e6144d48e9575589b0 +size 884 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-03-41_bask-pg0309u03a/events.out.tfevents.1759802711.bask-pg0309u03a.1084672.1 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-03-41_bask-pg0309u03a/events.out.tfevents.1759802711.bask-pg0309u03a.1084672.1 new file mode 100644 index 0000000000000000000000000000000000000000..745c7295e89f2470010f5dbc67cd3b49472f37cb --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-03-41_bask-pg0309u03a/events.out.tfevents.1759802711.bask-pg0309u03a.1084672.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfeb499369338a583b905c941c5ff69aeca1089f1472802e723f637df76850b5 +size 116410 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-03-41_bask-pg0309u03a/events.out.tfevents.1759828312.bask-pg0309u03a.1084672.2 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-03-41_bask-pg0309u03a/events.out.tfevents.1759828312.bask-pg0309u03a.1084672.2 new file mode 100644 index 0000000000000000000000000000000000000000..d8a1eae9d17795df936a05a5618f9b4107b16f8b --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-03-41_bask-pg0309u03a/events.out.tfevents.1759828312.bask-pg0309u03a.1084672.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e633518d904410cce35cf9d1d12a7f90a98cfefaed38a2c49899a62fd2eb82e5 +size 1840 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-09-14_bask-pg0309u36a/events.out.tfevents.1759803036.bask-pg0309u36a.294001.1 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-09-14_bask-pg0309u36a/events.out.tfevents.1759803036.bask-pg0309u36a.294001.1 new file mode 100644 index 0000000000000000000000000000000000000000..5ebe7921221d2cd6d11dede8e38ef7d323e877c9 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr/runs/Oct07_03-09-14_bask-pg0309u36a/events.out.tfevents.1759803036.bask-pg0309u36a.294001.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f2d39d511228f1b70a936620c76696edf7c38ac0920ae3adf4e6e0ecfff6fec +size 9558 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/config.json b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/model.safetensors b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..508ca4a6339b88bb614019804a670a09d12b214c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ded542864849353bdceb134e69a3b5a4fc9b80b59cd2a24af856083bf716a8d +size 4234672656 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/training_args.bin b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..03ac26681722131b92df65a227f342817dd43643 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7646b28e1a972840c74f3a2084a03d217bbdb53bac4e767a518a2fe11832dbf3 +size 7416 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/evaluation_metrics.json b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..d485a0bcafa659deaa435c4ee94b829772c7426d --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/evaluation_metrics.json @@ -0,0 +1,122 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5527857542037964, + "eval_ciou": 0.5932283997535706 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5824025869369507, + "eval_ciou": 0.6431398391723633 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5880689024925232, + "eval_ciou": 0.648893415927887 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5946951508522034, + "eval_ciou": 0.5710168480873108 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.607143223285675, + "eval_ciou": 0.6807403564453125 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6211094856262207, + "eval_ciou": 0.6658480167388916 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6222315430641174, + "eval_ciou": 0.6929301023483276 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6269657611846924, + "eval_ciou": 0.7312238812446594 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6310562491416931, + "eval_ciou": 0.7098237872123718 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6383457779884338, + "eval_ciou": 0.6986754536628723 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6070990562438965, + "eval_ciou": 0.6333158016204834 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7885053753852844, + "eval_ciou": 0.7932251691818237 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8098193407058716, + "eval_ciou": 0.8152725100517273 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7649039030075073, + "eval_ciou": 0.7621927857398987 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7370991706848145, + "eval_ciou": 0.7327128052711487 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.78033047914505, + "eval_ciou": 0.778212308883667 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6966450810432434, + "eval_ciou": 0.6818153858184814 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7497442960739136, + "eval_ciou": 0.7584661245346069 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7498595118522644, + "eval_ciou": 0.7527892589569092 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.3540762960910797, + "eval_ciou": 0.41344839334487915 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/events.out.tfevents.1759861263.bask-pg0309u06a.4175316.0 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/events.out.tfevents.1759861263.bask-pg0309u06a.4175316.0 new file mode 100644 index 0000000000000000000000000000000000000000..bbb498660eafdaaa8a12e8b0d14b3b37ed83cbcc --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/events.out.tfevents.1759861263.bask-pg0309u06a.4175316.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a297ea94a49a600172e339e28a45fc77aebaf73308a38d43f21a9e5ac079f3b0 +size 212849 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/runs/Oct07_19-21-00_bask-pg0309u06a/events.out.tfevents.1759861336.bask-pg0309u06a.4175316.1 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/runs/Oct07_19-21-00_bask-pg0309u06a/events.out.tfevents.1759861336.bask-pg0309u06a.4175316.1 new file mode 100644 index 0000000000000000000000000000000000000000..241bbdc020d4d915ea6ce30cfee3114031eb33e7 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/runs/Oct07_19-21-00_bask-pg0309u06a/events.out.tfevents.1759861336.bask-pg0309u06a.4175316.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e43e0e16ad8cd8303aa3aa63a0ef0e030657eb94a17484ea8b4316f473723e +size 116421 diff --git a/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/runs/Oct07_19-21-00_bask-pg0309u06a/events.out.tfevents.1759909408.bask-pg0309u06a.4175316.2 b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/runs/Oct07_19-21-00_bask-pg0309u06a/events.out.tfevents.1759909408.bask-pg0309u06a.4175316.2 new file mode 100644 index 0000000000000000000000000000000000000000..9980f3745ac8d9a20406c6c8872af043702e408e --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_ce_vlora_sr_cbs/runs/Oct07_19-21-00_bask-pg0309u06a/events.out.tfevents.1759909408.bask-pg0309u06a.4175316.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d168a0a440658ae28c1010517212ac041db4fa3c50e1aac528b1c72bc4e0fd +size 1548 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/config.json b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/model.safetensors b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cfb44b242f66d96a037207a999110ad08605c2d --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f04cd6727c00b2742cf129f2eb08f2c1d5b87af0217ac19dc6376d887f025fb +size 4234672656 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/training_args.bin b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..70ddebf0ef01f48ebebb493d4a393db005d59831 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68066f712336f0fdf020892265cda80625c0e52fa04cc5e88931824712ee8acc +size 7416 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/evaluation_metrics.json b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c5b8d7a7b91b1653a8f5be88e1d62bbb34939c87 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/evaluation_metrics.json @@ -0,0 +1,122 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5277431011199951, + "eval_ciou": 0.6159834861755371 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5695093870162964, + "eval_ciou": 0.656923234462738 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5628752708435059, + "eval_ciou": 0.5984412431716919 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5832757949829102, + "eval_ciou": 0.6542620062828064 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5863526463508606, + "eval_ciou": 0.672465443611145 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6079390048980713, + "eval_ciou": 0.6273269653320312 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6149767637252808, + "eval_ciou": 0.7198311686515808 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6183528900146484, + "eval_ciou": 0.7168822288513184 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6216787099838257, + "eval_ciou": 0.6504685878753662 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.628157377243042, + "eval_ciou": 0.6502699851989746 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6069071292877197, + "eval_ciou": 0.6400895118713379 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7894263863563538, + "eval_ciou": 0.791189968585968 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8096492290496826, + "eval_ciou": 0.8152292370796204 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7624980211257935, + "eval_ciou": 0.761019766330719 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7381681203842163, + "eval_ciou": 0.7294842004776001 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7807486653327942, + "eval_ciou": 0.7798734903335571 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6928234100341797, + "eval_ciou": 0.674961268901825 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7543120980262756, + "eval_ciou": 0.7628795504570007 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7510992288589478, + "eval_ciou": 0.7527034282684326 + }, + { + "val_dataset": "grefcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.3497574031352997, + "eval_ciou": 0.4087769389152527 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/events.out.tfevents.1759860775.bask-pg0309u06a.4166459.0 b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/events.out.tfevents.1759860775.bask-pg0309u06a.4166459.0 new file mode 100644 index 0000000000000000000000000000000000000000..968019e75cd826189b76999be55b9619f93a2bcf --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/events.out.tfevents.1759860775.bask-pg0309u06a.4166459.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541a9d112d7a5af910b42242c4f208c87af8c04886e58377a0dcc69564ff27b4 +size 1282 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/events.out.tfevents.1759861376.bask-pg0309u25a.2947352.0 b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/events.out.tfevents.1759861376.bask-pg0309u25a.2947352.0 new file mode 100644 index 0000000000000000000000000000000000000000..8d1323f55b97b3fedf8b262716906017c3d88945 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/events.out.tfevents.1759861376.bask-pg0309u25a.2947352.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b570f194a853ddde5905546ad3d3d444332ac2fd4a68cff83334b26cc040bf +size 212849 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-12-52_bask-pg0309u06a/events.out.tfevents.1759860859.bask-pg0309u06a.4166459.1 b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-12-52_bask-pg0309u06a/events.out.tfevents.1759860859.bask-pg0309u06a.4166459.1 new file mode 100644 index 0000000000000000000000000000000000000000..e1ed913c084e560e39bcce59c3c10128bb34ef24 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-12-52_bask-pg0309u06a/events.out.tfevents.1759860859.bask-pg0309u06a.4166459.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be3c540024b8d899936db36f9bffe7ef71c89584d71628545e7ecd071bb05a2 +size 9770 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-22-53_bask-pg0309u25a/events.out.tfevents.1759861446.bask-pg0309u25a.2947352.1 b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-22-53_bask-pg0309u25a/events.out.tfevents.1759861446.bask-pg0309u25a.2947352.1 new file mode 100644 index 0000000000000000000000000000000000000000..36425fe2773643cd852e0f89e6b845928c4f95b5 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-22-53_bask-pg0309u25a/events.out.tfevents.1759861446.bask-pg0309u25a.2947352.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b11d7815b996f9d45d5665b7aaceff356a68e202ba470480fd8c8dd22ab507d +size 116415 diff --git a/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-22-53_bask-pg0309u25a/events.out.tfevents.1759908785.bask-pg0309u25a.2947352.2 b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-22-53_bask-pg0309u25a/events.out.tfevents.1759908785.bask-pg0309u25a.2947352.2 new file mode 100644 index 0000000000000000000000000000000000000000..ffc4c58680a74aec6f86a9cc2653cb50d662c546 --- /dev/null +++ b/lisa-ivl3-2b_ss2_2_vlora_sr_cbs/runs/Oct07_19-22-53_bask-pg0309u25a/events.out.tfevents.1759908785.bask-pg0309u25a.2947352.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73f8bfce71a20c6df0c1d87116c264f9a3871cd966fdb0e0c2f6c0e9e726fb5 +size 1548