Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- lisa-ivl2-2b_aati_sr/ckpt_model/config.json +87 -0
- lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl2-2b_aati_sr/evaluation_metrics.json +176 -0
- lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0 +3 -0
- lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0 +3 -0
- lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1 +3 -0
- lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2 +3 -0
- lisa-ivl3-2b_aati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_aati_sr/evaluation_metrics.json +182 -0
- lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0 +3 -0
- lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0 +3 -0
- lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0 +3 -0
- lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1 +3 -0
- lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1 +3 -0
- lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1 +3 -0
- lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2 +3 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json +176 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0 +3 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1 +3 -0
- lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2 +3 -0
- lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0 +3 -0
- lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1 +3 -0
- lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1 +3 -0
- lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2 +3 -0
- lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0 +3 -0
- lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0 +3 -0
lisa-ivl2-2b_aati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 2,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"llm_config": {
|
| 16 |
+
"_name_or_path": "internlm/internlm2-chat-1_8b",
|
| 17 |
+
"architectures": [
|
| 18 |
+
"InternLM2ForCausalLM"
|
| 19 |
+
],
|
| 20 |
+
"attn_implementation": "eager",
|
| 21 |
+
"auto_map": {
|
| 22 |
+
"AutoConfig": "configuration_internlm2.InternLM2Config",
|
| 23 |
+
"AutoModel": "modeling_internlm2.InternLM2ForCausalLM",
|
| 24 |
+
"AutoModelForCausalLM": "modeling_internlm2.InternLM2ForCausalLM"
|
| 25 |
+
},
|
| 26 |
+
"bias": false,
|
| 27 |
+
"dtype": "bfloat16",
|
| 28 |
+
"hidden_act": "silu",
|
| 29 |
+
"hidden_size": 2048,
|
| 30 |
+
"initializer_range": 0.02,
|
| 31 |
+
"intermediate_size": 8192,
|
| 32 |
+
"max_position_embeddings": 32768,
|
| 33 |
+
"model_type": "internlm2",
|
| 34 |
+
"num_attention_heads": 16,
|
| 35 |
+
"num_hidden_layers": 24,
|
| 36 |
+
"num_key_value_heads": 8,
|
| 37 |
+
"pad_token_id": 2,
|
| 38 |
+
"rms_norm_eps": 1e-05,
|
| 39 |
+
"rope_scaling": {
|
| 40 |
+
"factor": 2.0,
|
| 41 |
+
"type": "dynamic"
|
| 42 |
+
},
|
| 43 |
+
"rope_theta": 1000000,
|
| 44 |
+
"use_bfloat16": true,
|
| 45 |
+
"use_cache": true,
|
| 46 |
+
"vocab_size": 92555
|
| 47 |
+
},
|
| 48 |
+
"max_dynamic_patch": 12,
|
| 49 |
+
"min_dynamic_patch": 1,
|
| 50 |
+
"model_type": "internvl_chat",
|
| 51 |
+
"output_attentions": false,
|
| 52 |
+
"pad_token_id": 2,
|
| 53 |
+
"ps_version": "v2",
|
| 54 |
+
"select_layer": -1,
|
| 55 |
+
"template": "internlm2-chat",
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"transformers_version": null,
|
| 58 |
+
"use_backbone_lora": 0,
|
| 59 |
+
"use_llm_lora": 0,
|
| 60 |
+
"use_thumbnail": true,
|
| 61 |
+
"vision_config": {
|
| 62 |
+
"architectures": [
|
| 63 |
+
"InternVisionModel"
|
| 64 |
+
],
|
| 65 |
+
"attention_dropout": 0.0,
|
| 66 |
+
"drop_path_rate": 0.0,
|
| 67 |
+
"dropout": 0.0,
|
| 68 |
+
"dtype": "bfloat16",
|
| 69 |
+
"hidden_act": "gelu",
|
| 70 |
+
"hidden_size": 1024,
|
| 71 |
+
"image_size": 448,
|
| 72 |
+
"initializer_factor": 1.0,
|
| 73 |
+
"initializer_range": 0.02,
|
| 74 |
+
"intermediate_size": 4096,
|
| 75 |
+
"layer_norm_eps": 1e-06,
|
| 76 |
+
"model_type": "intern_vit_6b",
|
| 77 |
+
"norm_type": "layer_norm",
|
| 78 |
+
"num_attention_heads": 16,
|
| 79 |
+
"num_channels": 3,
|
| 80 |
+
"num_hidden_layers": 24,
|
| 81 |
+
"patch_size": 14,
|
| 82 |
+
"qk_normalization": false,
|
| 83 |
+
"qkv_bias": true,
|
| 84 |
+
"use_bfloat16": true,
|
| 85 |
+
"use_flash_attn": true
|
| 86 |
+
}
|
| 87 |
+
}
|
lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db228d0f6958ce4561ad6c7b804104e6611700346e0490d1a409bdae9a01ee37
|
| 3 |
+
size 4470345504
|
lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af5483cf43ae9904b7a17b8cfc6f37988cd9b04cff3dd97add6d7329ce105964
|
| 3 |
+
size 7352
|
lisa-ivl2-2b_aati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.4299730360507965,
|
| 6 |
+
"eval_ciou": 0.5639147162437439
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.4750661551952362,
|
| 12 |
+
"eval_ciou": 0.49450477957725525
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5029056668281555,
|
| 18 |
+
"eval_ciou": 0.5545530319213867
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5038564205169678,
|
| 24 |
+
"eval_ciou": 0.5760622024536133
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.4931972026824951,
|
| 30 |
+
"eval_ciou": 0.5593799352645874
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5129929780960083,
|
| 36 |
+
"eval_ciou": 0.5596947073936462
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5289523005485535,
|
| 42 |
+
"eval_ciou": 0.5600836873054504
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.5335568785667419,
|
| 48 |
+
"eval_ciou": 0.5150668621063232
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5301417708396912,
|
| 54 |
+
"eval_ciou": 0.6077501773834229
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.5494945049285889,
|
| 60 |
+
"eval_ciou": 0.5914241075515747
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 11.0,
|
| 65 |
+
"eval_giou": 0.5493737459182739,
|
| 66 |
+
"eval_ciou": 0.6159847974777222
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 12.0,
|
| 71 |
+
"eval_giou": 0.5445547699928284,
|
| 72 |
+
"eval_ciou": 0.5939457416534424
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 13.0,
|
| 77 |
+
"eval_giou": 0.5355852842330933,
|
| 78 |
+
"eval_ciou": 0.5724180936813354
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 14.0,
|
| 83 |
+
"eval_giou": 0.5353021621704102,
|
| 84 |
+
"eval_ciou": 0.5821977257728577
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 15.0,
|
| 89 |
+
"eval_giou": 0.5353786945343018,
|
| 90 |
+
"eval_ciou": 0.5801292061805725
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 16.0,
|
| 95 |
+
"eval_giou": 0.5340729355812073,
|
| 96 |
+
"eval_ciou": 0.577383816242218
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 17.0,
|
| 101 |
+
"eval_giou": 0.5309929251670837,
|
| 102 |
+
"eval_ciou": 0.5779208540916443
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 18.0,
|
| 107 |
+
"eval_giou": 0.5346851944923401,
|
| 108 |
+
"eval_ciou": 0.5808385014533997
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 19.0,
|
| 113 |
+
"eval_giou": 0.5436327457427979,
|
| 114 |
+
"eval_ciou": 0.6044915318489075
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 20.0,
|
| 119 |
+
"eval_giou": 0.5427136421203613,
|
| 120 |
+
"eval_ciou": 0.5983464121818542
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|test",
|
| 124 |
+
"epoch": 20.0,
|
| 125 |
+
"eval_giou": 0.534633219242096,
|
| 126 |
+
"eval_ciou": 0.5675567984580994
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "refcoco|unc|val",
|
| 130 |
+
"epoch": 20.0,
|
| 131 |
+
"eval_giou": 0.7851775288581848,
|
| 132 |
+
"eval_ciou": 0.7884453535079956
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "refcoco|unc|testA",
|
| 136 |
+
"epoch": 20.0,
|
| 137 |
+
"eval_giou": 0.8090593218803406,
|
| 138 |
+
"eval_ciou": 0.8159176111221313
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "refcoco|unc|testB",
|
| 142 |
+
"epoch": 20.0,
|
| 143 |
+
"eval_giou": 0.7596240043640137,
|
| 144 |
+
"eval_ciou": 0.7600759863853455
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "refcoco+|unc|val",
|
| 148 |
+
"epoch": 20.0,
|
| 149 |
+
"eval_giou": 0.7349771857261658,
|
| 150 |
+
"eval_ciou": 0.7298452258110046
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 154 |
+
"epoch": 20.0,
|
| 155 |
+
"eval_giou": 0.7746085524559021,
|
| 156 |
+
"eval_ciou": 0.7730916142463684
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 160 |
+
"epoch": 20.0,
|
| 161 |
+
"eval_giou": 0.6849038004875183,
|
| 162 |
+
"eval_ciou": 0.6688467264175415
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "refcocog|umd|test",
|
| 166 |
+
"epoch": 20.0,
|
| 167 |
+
"eval_giou": 0.7478209733963013,
|
| 168 |
+
"eval_ciou": 0.7591495513916016
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "refcocog|umd|val",
|
| 172 |
+
"epoch": 20.0,
|
| 173 |
+
"eval_giou": 0.7427729368209839,
|
| 174 |
+
"eval_ciou": 0.7514315247535706
|
| 175 |
+
}
|
| 176 |
+
]
|
lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ff85d72042556023246781cc0c4c66d6f8d335407401c64599dac3ce77e8fe1
|
| 3 |
+
size 88
|
lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a321f870b68b8ec8959825f0bbdcc8fb471750bfc88c5d4aa6bbe242a19fcb1b
|
| 3 |
+
size 419822
|
lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76214c162c6cd38819fd854b3631bd4d684e2e40a92bb87ddcbd7dd42ba28bff
|
| 3 |
+
size 221855
|
lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8496bdaa0d7e4565e1a1a41424515e0446d9afa01b648d87e80bb325a7286963
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_aati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b8756ec9fa65a596154e58c2c8ffeeade5400bcf4af3f3807b310dc211a352f
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a32ca3f97667c68df088ddf7ee12d0dc27112557a472d11076fc1e7cb4fada87
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_aati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5756632685661316,
|
| 6 |
+
"eval_ciou": 0.6737087965011597
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5750120282173157,
|
| 12 |
+
"eval_ciou": 0.6774965524673462
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5997360348701477,
|
| 18 |
+
"eval_ciou": 0.6924350261688232
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 3.0,
|
| 23 |
+
"eval_giou": 0.5967223048210144,
|
| 24 |
+
"eval_ciou": 0.6778789162635803
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 4.0,
|
| 29 |
+
"eval_giou": 0.5993068218231201,
|
| 30 |
+
"eval_ciou": 0.6605137586593628
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 5.0,
|
| 35 |
+
"eval_giou": 0.5851569175720215,
|
| 36 |
+
"eval_ciou": 0.6708498597145081
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 6.0,
|
| 41 |
+
"eval_giou": 0.5863112211227417,
|
| 42 |
+
"eval_ciou": 0.691616415977478
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 7.0,
|
| 47 |
+
"eval_giou": 0.5981602668762207,
|
| 48 |
+
"eval_ciou": 0.6373696327209473
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 8.0,
|
| 53 |
+
"eval_giou": 0.5868176221847534,
|
| 54 |
+
"eval_ciou": 0.6186509728431702
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 9.0,
|
| 59 |
+
"eval_giou": 0.599888026714325,
|
| 60 |
+
"eval_ciou": 0.6218949556350708
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.5966016054153442,
|
| 66 |
+
"eval_ciou": 0.6475383639335632
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 11.0,
|
| 71 |
+
"eval_giou": 0.599604070186615,
|
| 72 |
+
"eval_ciou": 0.6404339075088501
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 12.0,
|
| 77 |
+
"eval_giou": 0.6057789325714111,
|
| 78 |
+
"eval_ciou": 0.6498401165008545
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 13.0,
|
| 83 |
+
"eval_giou": 0.6040271520614624,
|
| 84 |
+
"eval_ciou": 0.6116575002670288
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 14.0,
|
| 89 |
+
"eval_giou": 0.60584956407547,
|
| 90 |
+
"eval_ciou": 0.6290444135665894
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 15.0,
|
| 95 |
+
"eval_giou": 0.6188424229621887,
|
| 96 |
+
"eval_ciou": 0.6753682494163513
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 16.0,
|
| 101 |
+
"eval_giou": 0.6140751242637634,
|
| 102 |
+
"eval_ciou": 0.658414900302887
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 17.0,
|
| 107 |
+
"eval_giou": 0.6123270988464355,
|
| 108 |
+
"eval_ciou": 0.6641766428947449
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 18.0,
|
| 113 |
+
"eval_giou": 0.619356095790863,
|
| 114 |
+
"eval_ciou": 0.6501895785331726
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 19.0,
|
| 119 |
+
"eval_giou": 0.6187554597854614,
|
| 120 |
+
"eval_ciou": 0.6563228964805603
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|val",
|
| 124 |
+
"epoch": 20.0,
|
| 125 |
+
"eval_giou": 0.6174865961074829,
|
| 126 |
+
"eval_ciou": 0.6626467108726501
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "ReasonSeg|test",
|
| 130 |
+
"epoch": 20.0,
|
| 131 |
+
"eval_giou": 0.6232897043228149,
|
| 132 |
+
"eval_ciou": 0.6276214718818665
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "refcoco|unc|val",
|
| 136 |
+
"epoch": 20.0,
|
| 137 |
+
"eval_giou": 0.8062067627906799,
|
| 138 |
+
"eval_ciou": 0.810879111289978
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "refcoco|unc|testA",
|
| 142 |
+
"epoch": 20.0,
|
| 143 |
+
"eval_giou": 0.8242553472518921,
|
| 144 |
+
"eval_ciou": 0.8299362659454346
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "refcoco|unc|testB",
|
| 148 |
+
"epoch": 20.0,
|
| 149 |
+
"eval_giou": 0.7838391661643982,
|
| 150 |
+
"eval_ciou": 0.786938488483429
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "refcoco+|unc|val",
|
| 154 |
+
"epoch": 20.0,
|
| 155 |
+
"eval_giou": 0.7634923458099365,
|
| 156 |
+
"eval_ciou": 0.7587481737136841
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 160 |
+
"epoch": 20.0,
|
| 161 |
+
"eval_giou": 0.8028817176818848,
|
| 162 |
+
"eval_ciou": 0.8037988543510437
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 166 |
+
"epoch": 20.0,
|
| 167 |
+
"eval_giou": 0.7256011366844177,
|
| 168 |
+
"eval_ciou": 0.7161976099014282
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "refcocog|umd|test",
|
| 172 |
+
"epoch": 20.0,
|
| 173 |
+
"eval_giou": 0.7690672278404236,
|
| 174 |
+
"eval_ciou": 0.7782297134399414
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"val_dataset": "refcocog|umd|val",
|
| 178 |
+
"epoch": 20.0,
|
| 179 |
+
"eval_giou": 0.7630250453948975,
|
| 180 |
+
"eval_ciou": 0.7679163217544556
|
| 181 |
+
}
|
| 182 |
+
]
|
lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6060d551386abeeef58b5d85a5781e838437a3f4f15aae1bb56735d9d87c995
|
| 3 |
+
size 62245
|
lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff02795f8e9f35bda1236632a6fd2dfb546ccfaf86f99081ac0ce7795a3ab1e6
|
| 3 |
+
size 88
|
lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dde771411e16dac8b9ec7ecbdd1d1d5df1d90e4abc4928f2147f896093f43774
|
| 3 |
+
size 378412
|
lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8b24ce4e45eabeb0c6fc91a2b8bc9dcede990e16d18b17e8744d98c220d93e0
|
| 3 |
+
size 41157
|
lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a2802bbb94769223c74bd2a3406fa8febce2e7f72e7795feea4c08b55409e34
|
| 3 |
+
size 9116
|
lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:236a74e7a10f5744921f0f493a18f2ed79694ab8ee3f9f0567da35a7ae00fe7a
|
| 3 |
+
size 201997
|
lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8507b72c7c21b75d4e0b67b9ba7029d67536808b9ddc221aff9db173755b683c
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d38203e4873ce0d8abfd8a0527229991c202472124463253ad5706c15bfcca80
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a5858fec803a4d4d42cb4f8643322589d07cf0bbba9e7d456d7b801bbdca6c7
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5831701755523682,
|
| 6 |
+
"eval_ciou": 0.6626128554344177
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5952411890029907,
|
| 12 |
+
"eval_ciou": 0.6626877188682556
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.6062884330749512,
|
| 18 |
+
"eval_ciou": 0.6667135953903198
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.6196147799491882,
|
| 24 |
+
"eval_ciou": 0.6437596678733826
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.6216316223144531,
|
| 30 |
+
"eval_ciou": 0.6733407974243164
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.6207277774810791,
|
| 36 |
+
"eval_ciou": 0.7004563808441162
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.6256881952285767,
|
| 42 |
+
"eval_ciou": 0.681186318397522
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6138451099395752,
|
| 48 |
+
"eval_ciou": 0.6687238812446594
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6257895231246948,
|
| 54 |
+
"eval_ciou": 0.6808822154998779
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6295360326766968,
|
| 60 |
+
"eval_ciou": 0.6649029850959778
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 11.0,
|
| 65 |
+
"eval_giou": 0.6255075931549072,
|
| 66 |
+
"eval_ciou": 0.672091543674469
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 12.0,
|
| 71 |
+
"eval_giou": 0.6274581551551819,
|
| 72 |
+
"eval_ciou": 0.6651784181594849
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 13.0,
|
| 77 |
+
"eval_giou": 0.6306081414222717,
|
| 78 |
+
"eval_ciou": 0.6813814640045166
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 14.0,
|
| 83 |
+
"eval_giou": 0.6242629289627075,
|
| 84 |
+
"eval_ciou": 0.6436342000961304
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 15.0,
|
| 89 |
+
"eval_giou": 0.636084794998169,
|
| 90 |
+
"eval_ciou": 0.6750655770301819
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 16.0,
|
| 95 |
+
"eval_giou": 0.6268562078475952,
|
| 96 |
+
"eval_ciou": 0.6949459314346313
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 17.0,
|
| 101 |
+
"eval_giou": 0.6331221461296082,
|
| 102 |
+
"eval_ciou": 0.6858609914779663
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 18.0,
|
| 107 |
+
"eval_giou": 0.6354182958602905,
|
| 108 |
+
"eval_ciou": 0.6718393564224243
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 19.0,
|
| 113 |
+
"eval_giou": 0.6372456550598145,
|
| 114 |
+
"eval_ciou": 0.6829223036766052
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 20.0,
|
| 119 |
+
"eval_giou": 0.635535717010498,
|
| 120 |
+
"eval_ciou": 0.6737943291664124
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|test",
|
| 124 |
+
"epoch": 20.0,
|
| 125 |
+
"eval_giou": 0.6199853420257568,
|
| 126 |
+
"eval_ciou": 0.6205106973648071
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "refcoco|unc|val",
|
| 130 |
+
"epoch": 20.0,
|
| 131 |
+
"eval_giou": 0.8225948214530945,
|
| 132 |
+
"eval_ciou": 0.8271152377128601
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "refcoco|unc|testA",
|
| 136 |
+
"epoch": 20.0,
|
| 137 |
+
"eval_giou": 0.8378753066062927,
|
| 138 |
+
"eval_ciou": 0.843865156173706
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "refcoco|unc|testB",
|
| 142 |
+
"epoch": 20.0,
|
| 143 |
+
"eval_giou": 0.8056657910346985,
|
| 144 |
+
"eval_ciou": 0.8094556331634521
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "refcoco+|unc|val",
|
| 148 |
+
"epoch": 20.0,
|
| 149 |
+
"eval_giou": 0.7836799025535583,
|
| 150 |
+
"eval_ciou": 0.7731773853302002
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 154 |
+
"epoch": 20.0,
|
| 155 |
+
"eval_giou": 0.8187907338142395,
|
| 156 |
+
"eval_ciou": 0.8201593160629272
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 160 |
+
"epoch": 20.0,
|
| 161 |
+
"eval_giou": 0.7530681490898132,
|
| 162 |
+
"eval_ciou": 0.741623044013977
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "refcocog|umd|test",
|
| 166 |
+
"epoch": 20.0,
|
| 167 |
+
"eval_giou": 0.7875200510025024,
|
| 168 |
+
"eval_ciou": 0.7955977320671082
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "refcocog|umd|val",
|
| 172 |
+
"epoch": 20.0,
|
| 173 |
+
"eval_giou": 0.7873671650886536,
|
| 174 |
+
"eval_ciou": 0.7938516736030579
|
| 175 |
+
}
|
| 176 |
+
]
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67c2af395273417556689a5d08107ef7aa0428fa56a3865c50dd72904187edc3
|
| 3 |
+
size 419822
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fce42c53d94518d177027c274c37c2cda048ea86b389c783cfaab5bbca82f90
|
| 3 |
+
size 223363
|
lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:181b60d216f4848288f94bcee5fe5a3911b349dbdb683b7bff42595701bca3c8
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:173587561539e9ca8afd26179c60ac88288d2251ffbcdfc852b311549c141bf8
|
| 3 |
+
size 4244119544
|
lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be8234d7498e224822badd158dcf7a4e5ff9f0b47390f78268aca5df600092ab
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.49992606043815613,
|
| 6 |
+
"eval_ciou": 0.6110827922821045
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5632675290107727,
|
| 12 |
+
"eval_ciou": 0.623365581035614
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5778806805610657,
|
| 18 |
+
"eval_ciou": 0.6206309199333191
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5851303935050964,
|
| 24 |
+
"eval_ciou": 0.6020804643630981
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5820455551147461,
|
| 30 |
+
"eval_ciou": 0.6651975512504578
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.6028497219085693,
|
| 36 |
+
"eval_ciou": 0.6962510943412781
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5977049469947815,
|
| 42 |
+
"eval_ciou": 0.6804401874542236
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6007415652275085,
|
| 48 |
+
"eval_ciou": 0.6796437501907349
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.611798882484436,
|
| 54 |
+
"eval_ciou": 0.6809463500976562
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6134523153305054,
|
| 60 |
+
"eval_ciou": 0.6816759705543518
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.6059213876724243,
|
| 66 |
+
"eval_ciou": 0.6428766846656799
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7799202799797058,
|
| 72 |
+
"eval_ciou": 0.786268413066864
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8031054139137268,
|
| 78 |
+
"eval_ciou": 0.8086256980895996
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7538403272628784,
|
| 84 |
+
"eval_ciou": 0.7585700750350952
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7317773699760437,
|
| 90 |
+
"eval_ciou": 0.7287389039993286
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7756525278091431,
|
| 96 |
+
"eval_ciou": 0.7790922522544861
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6839732527732849,
|
| 102 |
+
"eval_ciou": 0.6755383610725403
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7441026568412781,
|
| 108 |
+
"eval_ciou": 0.7565898895263672
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7429221272468567,
|
| 114 |
+
"eval_ciou": 0.7514289021492004
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a626d6013165f7d36b0d694d7cb70efc26aa7293b04f2b4033c9f6aaa49e6c89
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fca84167c42051785d90a2d5cfe045d729b673057867500e41f40bee333d9175
|
| 3 |
+
size 116399
|
lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28be3c60b606cd71d8db4d463b62c2df3c4bd394ae7c4e8ac23f0f56f371b33a
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f4a808fc1338f523d39870fc6f62d3d0c3f65a38db9f6aba9ff19db3dbb7f81
|
| 3 |
+
size 4244119544
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:379c086e43080e14b752c22cfffeb2e07a07c0bd6ea16efb629f93d6cf26012c
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.529840350151062,
|
| 6 |
+
"eval_ciou": 0.61083984375
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5495224595069885,
|
| 12 |
+
"eval_ciou": 0.6280785202980042
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5777504444122314,
|
| 18 |
+
"eval_ciou": 0.6374984979629517
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.577897846698761,
|
| 24 |
+
"eval_ciou": 0.6562594771385193
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5785743594169617,
|
| 30 |
+
"eval_ciou": 0.6527135968208313
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5933905243873596,
|
| 36 |
+
"eval_ciou": 0.6383258700370789
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.6087335348129272,
|
| 42 |
+
"eval_ciou": 0.6717571020126343
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6045055985450745,
|
| 48 |
+
"eval_ciou": 0.6598408818244934
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6039425730705261,
|
| 54 |
+
"eval_ciou": 0.6512514352798462
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6099196076393127,
|
| 60 |
+
"eval_ciou": 0.6599507927894592
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.5983391404151917,
|
| 66 |
+
"eval_ciou": 0.6378564238548279
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7846658825874329,
|
| 72 |
+
"eval_ciou": 0.7908703684806824
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8061053156852722,
|
| 78 |
+
"eval_ciou": 0.8121806979179382
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7557012438774109,
|
| 84 |
+
"eval_ciou": 0.7612731456756592
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7339252233505249,
|
| 90 |
+
"eval_ciou": 0.7308076620101929
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7778381109237671,
|
| 96 |
+
"eval_ciou": 0.7786577343940735
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6824975609779358,
|
| 102 |
+
"eval_ciou": 0.6733501553535461
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7487913370132446,
|
| 108 |
+
"eval_ciou": 0.7593491077423096
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.741722047328949,
|
| 114 |
+
"eval_ciou": 0.7474746108055115
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4168921ea1ec0605b7c338bf6d503272031b323ecd4d82df1611cc32a60dffff
|
| 3 |
+
size 88
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb4061204dccb97c29c5016a06fb6b8106538e983d4706a9d8cfaf792ab96b22
|
| 3 |
+
size 88
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c17b438e249a33aa0523b19d071b841150d521f4866f6a455593a32537716925
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04d1081dc5085eec9aa9a354881fc3242aef9ef28c256ec99da1afaf8dbc23db
|
| 3 |
+
size 9142
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b8b67e67b8dea507d2af6b314c185d004a4f6812d77e6466ac9798e3dad32b3
|
| 3 |
+
size 9142
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97e63030a506caa5bfdc317829ff84b1ddab6909836eaafeac7140ef1a7d1553
|
| 3 |
+
size 116408
|
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39734374fddcd4e3ccd2b2229fe29d71a729ae3759c1b098cb295dc411c47503
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e765f787a4b4b90a44c541c060bb60a6be32e0be5cf6019395536ef4edefc8fb
|
| 3 |
+
size 4234675816
|
lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f76ac58c2275447d9bcb0a727e23e4a780f4b7215404a84c15dbfdfb1231188
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5373343229293823,
|
| 6 |
+
"eval_ciou": 0.6180018782615662
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5649374127388,
|
| 12 |
+
"eval_ciou": 0.636763334274292
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5869829654693604,
|
| 18 |
+
"eval_ciou": 0.7015414834022522
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5917444825172424,
|
| 24 |
+
"eval_ciou": 0.7137655019760132
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5996885895729065,
|
| 30 |
+
"eval_ciou": 0.7088227868080139
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.6110551953315735,
|
| 36 |
+
"eval_ciou": 0.6965492963790894
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.6078798174858093,
|
| 42 |
+
"eval_ciou": 0.718289852142334
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6149584054946899,
|
| 48 |
+
"eval_ciou": 0.6968558430671692
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6170741319656372,
|
| 54 |
+
"eval_ciou": 0.7212521433830261
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6232219934463501,
|
| 60 |
+
"eval_ciou": 0.7210202217102051
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.596360981464386,
|
| 66 |
+
"eval_ciou": 0.6341654062271118
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7841194868087769,
|
| 72 |
+
"eval_ciou": 0.7900864481925964
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8032773733139038,
|
| 78 |
+
"eval_ciou": 0.8108689188957214
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7510504722595215,
|
| 84 |
+
"eval_ciou": 0.7533969879150391
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7322676181793213,
|
| 90 |
+
"eval_ciou": 0.727592408657074
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7752026915550232,
|
| 96 |
+
"eval_ciou": 0.7760695219039917
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6822892427444458,
|
| 102 |
+
"eval_ciou": 0.67359459400177
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7448171973228455,
|
| 108 |
+
"eval_ciou": 0.7525338530540466
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7451841235160828,
|
| 114 |
+
"eval_ciou": 0.7531925439834595
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:742dec7da1d819b4b47b6b557d7c0265011f67a5b2fb8837f2e59ea2e2f5c5b3
|
| 3 |
+
size 88
|
lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bad55cd273aecdd44863bd2d637e019c83e3779f07c25f3f53358f367865772f
|
| 3 |
+
size 88
|