UltraDoughnut commited on
Commit
da7e2c3
·
verified ·
1 Parent(s): c9fb745

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. lisa-ivl2-2b_aati_sr/ckpt_model/config.json +87 -0
  2. lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors +3 -0
  3. lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin +3 -0
  4. lisa-ivl2-2b_aati_sr/evaluation_metrics.json +176 -0
  5. lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0 +3 -0
  6. lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0 +3 -0
  7. lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1 +3 -0
  8. lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2 +3 -0
  9. lisa-ivl3-2b_aati_sr/ckpt_model/config.json +143 -0
  10. lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors +3 -0
  11. lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin +3 -0
  12. lisa-ivl3-2b_aati_sr/evaluation_metrics.json +182 -0
  13. lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0 +3 -0
  14. lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0 +3 -0
  15. lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0 +3 -0
  16. lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1 +3 -0
  17. lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1 +3 -0
  18. lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1 +3 -0
  19. lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2 +3 -0
  20. lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json +143 -0
  21. lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors +3 -0
  22. lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin +3 -0
  23. lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json +176 -0
  24. lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0 +3 -0
  25. lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1 +3 -0
  26. lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2 +3 -0
  27. lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json +143 -0
  28. lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors +3 -0
  29. lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin +3 -0
  30. lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json +116 -0
  31. lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0 +3 -0
  32. lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1 +3 -0
  33. lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2 +3 -0
  34. lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json +143 -0
  35. lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors +3 -0
  36. lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin +3 -0
  37. lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json +116 -0
  38. lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0 +3 -0
  39. lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0 +3 -0
  40. lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0 +3 -0
  41. lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1 +3 -0
  42. lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1 +3 -0
  43. lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1 +3 -0
  44. lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2 +3 -0
  45. lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json +143 -0
  46. lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors +3 -0
  47. lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin +3 -0
  48. lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json +116 -0
  49. lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0 +3 -0
  50. lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0 +3 -0
lisa-ivl2-2b_aati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 2,
14
+ "force_image_size": 448,
15
+ "llm_config": {
16
+ "_name_or_path": "internlm/internlm2-chat-1_8b",
17
+ "architectures": [
18
+ "InternLM2ForCausalLM"
19
+ ],
20
+ "attn_implementation": "eager",
21
+ "auto_map": {
22
+ "AutoConfig": "configuration_internlm2.InternLM2Config",
23
+ "AutoModel": "modeling_internlm2.InternLM2ForCausalLM",
24
+ "AutoModelForCausalLM": "modeling_internlm2.InternLM2ForCausalLM"
25
+ },
26
+ "bias": false,
27
+ "dtype": "bfloat16",
28
+ "hidden_act": "silu",
29
+ "hidden_size": 2048,
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 8192,
32
+ "max_position_embeddings": 32768,
33
+ "model_type": "internlm2",
34
+ "num_attention_heads": 16,
35
+ "num_hidden_layers": 24,
36
+ "num_key_value_heads": 8,
37
+ "pad_token_id": 2,
38
+ "rms_norm_eps": 1e-05,
39
+ "rope_scaling": {
40
+ "factor": 2.0,
41
+ "type": "dynamic"
42
+ },
43
+ "rope_theta": 1000000,
44
+ "use_bfloat16": true,
45
+ "use_cache": true,
46
+ "vocab_size": 92555
47
+ },
48
+ "max_dynamic_patch": 12,
49
+ "min_dynamic_patch": 1,
50
+ "model_type": "internvl_chat",
51
+ "output_attentions": false,
52
+ "pad_token_id": 2,
53
+ "ps_version": "v2",
54
+ "select_layer": -1,
55
+ "template": "internlm2-chat",
56
+ "tie_word_embeddings": false,
57
+ "transformers_version": null,
58
+ "use_backbone_lora": 0,
59
+ "use_llm_lora": 0,
60
+ "use_thumbnail": true,
61
+ "vision_config": {
62
+ "architectures": [
63
+ "InternVisionModel"
64
+ ],
65
+ "attention_dropout": 0.0,
66
+ "drop_path_rate": 0.0,
67
+ "dropout": 0.0,
68
+ "dtype": "bfloat16",
69
+ "hidden_act": "gelu",
70
+ "hidden_size": 1024,
71
+ "image_size": 448,
72
+ "initializer_factor": 1.0,
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 4096,
75
+ "layer_norm_eps": 1e-06,
76
+ "model_type": "intern_vit_6b",
77
+ "norm_type": "layer_norm",
78
+ "num_attention_heads": 16,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 24,
81
+ "patch_size": 14,
82
+ "qk_normalization": false,
83
+ "qkv_bias": true,
84
+ "use_bfloat16": true,
85
+ "use_flash_attn": true
86
+ }
87
+ }
lisa-ivl2-2b_aati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db228d0f6958ce4561ad6c7b804104e6611700346e0490d1a409bdae9a01ee37
3
+ size 4470345504
lisa-ivl2-2b_aati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af5483cf43ae9904b7a17b8cfc6f37988cd9b04cff3dd97add6d7329ce105964
3
+ size 7352
lisa-ivl2-2b_aati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.4299730360507965,
6
+ "eval_ciou": 0.5639147162437439
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.4750661551952362,
12
+ "eval_ciou": 0.49450477957725525
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5029056668281555,
18
+ "eval_ciou": 0.5545530319213867
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5038564205169678,
24
+ "eval_ciou": 0.5760622024536133
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.4931972026824951,
30
+ "eval_ciou": 0.5593799352645874
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5129929780960083,
36
+ "eval_ciou": 0.5596947073936462
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5289523005485535,
42
+ "eval_ciou": 0.5600836873054504
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.5335568785667419,
48
+ "eval_ciou": 0.5150668621063232
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5301417708396912,
54
+ "eval_ciou": 0.6077501773834229
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.5494945049285889,
60
+ "eval_ciou": 0.5914241075515747
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 11.0,
65
+ "eval_giou": 0.5493737459182739,
66
+ "eval_ciou": 0.6159847974777222
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 12.0,
71
+ "eval_giou": 0.5445547699928284,
72
+ "eval_ciou": 0.5939457416534424
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 13.0,
77
+ "eval_giou": 0.5355852842330933,
78
+ "eval_ciou": 0.5724180936813354
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 14.0,
83
+ "eval_giou": 0.5353021621704102,
84
+ "eval_ciou": 0.5821977257728577
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 15.0,
89
+ "eval_giou": 0.5353786945343018,
90
+ "eval_ciou": 0.5801292061805725
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 16.0,
95
+ "eval_giou": 0.5340729355812073,
96
+ "eval_ciou": 0.577383816242218
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 17.0,
101
+ "eval_giou": 0.5309929251670837,
102
+ "eval_ciou": 0.5779208540916443
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 18.0,
107
+ "eval_giou": 0.5346851944923401,
108
+ "eval_ciou": 0.5808385014533997
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 19.0,
113
+ "eval_giou": 0.5436327457427979,
114
+ "eval_ciou": 0.6044915318489075
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 20.0,
119
+ "eval_giou": 0.5427136421203613,
120
+ "eval_ciou": 0.5983464121818542
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|test",
124
+ "epoch": 20.0,
125
+ "eval_giou": 0.534633219242096,
126
+ "eval_ciou": 0.5675567984580994
127
+ },
128
+ {
129
+ "val_dataset": "refcoco|unc|val",
130
+ "epoch": 20.0,
131
+ "eval_giou": 0.7851775288581848,
132
+ "eval_ciou": 0.7884453535079956
133
+ },
134
+ {
135
+ "val_dataset": "refcoco|unc|testA",
136
+ "epoch": 20.0,
137
+ "eval_giou": 0.8090593218803406,
138
+ "eval_ciou": 0.8159176111221313
139
+ },
140
+ {
141
+ "val_dataset": "refcoco|unc|testB",
142
+ "epoch": 20.0,
143
+ "eval_giou": 0.7596240043640137,
144
+ "eval_ciou": 0.7600759863853455
145
+ },
146
+ {
147
+ "val_dataset": "refcoco+|unc|val",
148
+ "epoch": 20.0,
149
+ "eval_giou": 0.7349771857261658,
150
+ "eval_ciou": 0.7298452258110046
151
+ },
152
+ {
153
+ "val_dataset": "refcoco+|unc|testA",
154
+ "epoch": 20.0,
155
+ "eval_giou": 0.7746085524559021,
156
+ "eval_ciou": 0.7730916142463684
157
+ },
158
+ {
159
+ "val_dataset": "refcoco+|unc|testB",
160
+ "epoch": 20.0,
161
+ "eval_giou": 0.6849038004875183,
162
+ "eval_ciou": 0.6688467264175415
163
+ },
164
+ {
165
+ "val_dataset": "refcocog|umd|test",
166
+ "epoch": 20.0,
167
+ "eval_giou": 0.7478209733963013,
168
+ "eval_ciou": 0.7591495513916016
169
+ },
170
+ {
171
+ "val_dataset": "refcocog|umd|val",
172
+ "epoch": 20.0,
173
+ "eval_giou": 0.7427729368209839,
174
+ "eval_ciou": 0.7514315247535706
175
+ }
176
+ ]
lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176485.bask-pg0308u25a.324870.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff85d72042556023246781cc0c4c66d6f8d335407401c64599dac3ce77e8fe1
3
+ size 88
lisa-ivl2-2b_aati_sr/events.out.tfevents.1759176613.bask-pg0308u25a.335258.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a321f870b68b8ec8959825f0bbdcc8fb471750bfc88c5d4aa6bbe242a19fcb1b
3
+ size 419822
lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759176696.bask-pg0308u25a.335258.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76214c162c6cd38819fd854b3631bd4d684e2e40a92bb87ddcbd7dd42ba28bff
3
+ size 221855
lisa-ivl2-2b_aati_sr/runs/Sep29_21-10-10_bask-pg0308u25a/events.out.tfevents.1759226831.bask-pg0308u25a.335258.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8496bdaa0d7e4565e1a1a41424515e0446d9afa01b648d87e80bb325a7286963
3
+ size 1402
lisa-ivl3-2b_aati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_aati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8756ec9fa65a596154e58c2c8ffeeade5400bcf4af3f3807b310dc211a352f
3
+ size 4211070232
lisa-ivl3-2b_aati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32ca3f97667c68df088ddf7ee12d0dc27112557a472d11076fc1e7cb4fada87
3
+ size 7352
lisa-ivl3-2b_aati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5756632685661316,
6
+ "eval_ciou": 0.6737087965011597
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5750120282173157,
12
+ "eval_ciou": 0.6774965524673462
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5997360348701477,
18
+ "eval_ciou": 0.6924350261688232
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 3.0,
23
+ "eval_giou": 0.5967223048210144,
24
+ "eval_ciou": 0.6778789162635803
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 4.0,
29
+ "eval_giou": 0.5993068218231201,
30
+ "eval_ciou": 0.6605137586593628
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 5.0,
35
+ "eval_giou": 0.5851569175720215,
36
+ "eval_ciou": 0.6708498597145081
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 6.0,
41
+ "eval_giou": 0.5863112211227417,
42
+ "eval_ciou": 0.691616415977478
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 7.0,
47
+ "eval_giou": 0.5981602668762207,
48
+ "eval_ciou": 0.6373696327209473
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 8.0,
53
+ "eval_giou": 0.5868176221847534,
54
+ "eval_ciou": 0.6186509728431702
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 9.0,
59
+ "eval_giou": 0.599888026714325,
60
+ "eval_ciou": 0.6218949556350708
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.5966016054153442,
66
+ "eval_ciou": 0.6475383639335632
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 11.0,
71
+ "eval_giou": 0.599604070186615,
72
+ "eval_ciou": 0.6404339075088501
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 12.0,
77
+ "eval_giou": 0.6057789325714111,
78
+ "eval_ciou": 0.6498401165008545
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 13.0,
83
+ "eval_giou": 0.6040271520614624,
84
+ "eval_ciou": 0.6116575002670288
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 14.0,
89
+ "eval_giou": 0.60584956407547,
90
+ "eval_ciou": 0.6290444135665894
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 15.0,
95
+ "eval_giou": 0.6188424229621887,
96
+ "eval_ciou": 0.6753682494163513
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 16.0,
101
+ "eval_giou": 0.6140751242637634,
102
+ "eval_ciou": 0.658414900302887
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 17.0,
107
+ "eval_giou": 0.6123270988464355,
108
+ "eval_ciou": 0.6641766428947449
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 18.0,
113
+ "eval_giou": 0.619356095790863,
114
+ "eval_ciou": 0.6501895785331726
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 19.0,
119
+ "eval_giou": 0.6187554597854614,
120
+ "eval_ciou": 0.6563228964805603
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|val",
124
+ "epoch": 20.0,
125
+ "eval_giou": 0.6174865961074829,
126
+ "eval_ciou": 0.6626467108726501
127
+ },
128
+ {
129
+ "val_dataset": "ReasonSeg|test",
130
+ "epoch": 20.0,
131
+ "eval_giou": 0.6232897043228149,
132
+ "eval_ciou": 0.6276214718818665
133
+ },
134
+ {
135
+ "val_dataset": "refcoco|unc|val",
136
+ "epoch": 20.0,
137
+ "eval_giou": 0.8062067627906799,
138
+ "eval_ciou": 0.810879111289978
139
+ },
140
+ {
141
+ "val_dataset": "refcoco|unc|testA",
142
+ "epoch": 20.0,
143
+ "eval_giou": 0.8242553472518921,
144
+ "eval_ciou": 0.8299362659454346
145
+ },
146
+ {
147
+ "val_dataset": "refcoco|unc|testB",
148
+ "epoch": 20.0,
149
+ "eval_giou": 0.7838391661643982,
150
+ "eval_ciou": 0.786938488483429
151
+ },
152
+ {
153
+ "val_dataset": "refcoco+|unc|val",
154
+ "epoch": 20.0,
155
+ "eval_giou": 0.7634923458099365,
156
+ "eval_ciou": 0.7587481737136841
157
+ },
158
+ {
159
+ "val_dataset": "refcoco+|unc|testA",
160
+ "epoch": 20.0,
161
+ "eval_giou": 0.8028817176818848,
162
+ "eval_ciou": 0.8037988543510437
163
+ },
164
+ {
165
+ "val_dataset": "refcoco+|unc|testB",
166
+ "epoch": 20.0,
167
+ "eval_giou": 0.7256011366844177,
168
+ "eval_ciou": 0.7161976099014282
169
+ },
170
+ {
171
+ "val_dataset": "refcocog|umd|test",
172
+ "epoch": 20.0,
173
+ "eval_giou": 0.7690672278404236,
174
+ "eval_ciou": 0.7782297134399414
175
+ },
176
+ {
177
+ "val_dataset": "refcocog|umd|val",
178
+ "epoch": 20.0,
179
+ "eval_giou": 0.7630250453948975,
180
+ "eval_ciou": 0.7679163217544556
181
+ }
182
+ ]
lisa-ivl3-2b_aati_sr/events.out.tfevents.1759004643.bask-pg0308u25a.2921884.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6060d551386abeeef58b5d85a5781e838437a3f4f15aae1bb56735d9d87c995
3
+ size 62245
lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097339.bask-pg0308u25a.2052782.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff02795f8e9f35bda1236632a6fd2dfb546ccfaf86f99081ac0ce7795a3ab1e6
3
+ size 88
lisa-ivl3-2b_aati_sr/events.out.tfevents.1759097469.bask-pg0308u25a.2060658.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde771411e16dac8b9ec7ecbdd1d1d5df1d90e4abc4928f2147f896093f43774
3
+ size 378412
lisa-ivl3-2b_aati_sr/runs/Sep27_21-24-01_bask-pg0308u25a/events.out.tfevents.1759004710.bask-pg0308u25a.2921884.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b24ce4e45eabeb0c6fc91a2b8bc9dcede990e16d18b17e8744d98c220d93e0
3
+ size 41157
lisa-ivl3-2b_aati_sr/runs/Sep28_23-08-55_bask-pg0308u25a/events.out.tfevents.1759097412.bask-pg0308u25a.2052782.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2802bbb94769223c74bd2a3406fa8febce2e7f72e7795feea4c08b55409e34
3
+ size 9116
lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759097532.bask-pg0308u25a.2060658.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236a74e7a10f5744921f0f493a18f2ed79694ab8ee3f9f0567da35a7ae00fe7a
3
+ size 201997
lisa-ivl3-2b_aati_sr/runs/Sep28_23-11-05_bask-pg0308u25a/events.out.tfevents.1759137768.bask-pg0308u25a.2060658.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8507b72c7c21b75d4e0b67b9ba7029d67536808b9ddc221aff9db173755b683c
3
+ size 1402
lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38203e4873ce0d8abfd8a0527229991c202472124463253ad5706c15bfcca80
3
+ size 4211070232
lisa-ivl3-2b_aati_sr_bs5acu8e20/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a5858fec803a4d4d42cb4f8643322589d07cf0bbba9e7d456d7b801bbdca6c7
3
+ size 7352
lisa-ivl3-2b_aati_sr_bs5acu8e20/evaluation_metrics.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5831701755523682,
6
+ "eval_ciou": 0.6626128554344177
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5952411890029907,
12
+ "eval_ciou": 0.6626877188682556
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.6062884330749512,
18
+ "eval_ciou": 0.6667135953903198
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.6196147799491882,
24
+ "eval_ciou": 0.6437596678733826
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.6216316223144531,
30
+ "eval_ciou": 0.6733407974243164
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.6207277774810791,
36
+ "eval_ciou": 0.7004563808441162
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.6256881952285767,
42
+ "eval_ciou": 0.681186318397522
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6138451099395752,
48
+ "eval_ciou": 0.6687238812446594
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6257895231246948,
54
+ "eval_ciou": 0.6808822154998779
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6295360326766968,
60
+ "eval_ciou": 0.6649029850959778
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 11.0,
65
+ "eval_giou": 0.6255075931549072,
66
+ "eval_ciou": 0.672091543674469
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 12.0,
71
+ "eval_giou": 0.6274581551551819,
72
+ "eval_ciou": 0.6651784181594849
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 13.0,
77
+ "eval_giou": 0.6306081414222717,
78
+ "eval_ciou": 0.6813814640045166
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 14.0,
83
+ "eval_giou": 0.6242629289627075,
84
+ "eval_ciou": 0.6436342000961304
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 15.0,
89
+ "eval_giou": 0.636084794998169,
90
+ "eval_ciou": 0.6750655770301819
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 16.0,
95
+ "eval_giou": 0.6268562078475952,
96
+ "eval_ciou": 0.6949459314346313
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 17.0,
101
+ "eval_giou": 0.6331221461296082,
102
+ "eval_ciou": 0.6858609914779663
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 18.0,
107
+ "eval_giou": 0.6354182958602905,
108
+ "eval_ciou": 0.6718393564224243
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 19.0,
113
+ "eval_giou": 0.6372456550598145,
114
+ "eval_ciou": 0.6829223036766052
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 20.0,
119
+ "eval_giou": 0.635535717010498,
120
+ "eval_ciou": 0.6737943291664124
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|test",
124
+ "epoch": 20.0,
125
+ "eval_giou": 0.6199853420257568,
126
+ "eval_ciou": 0.6205106973648071
127
+ },
128
+ {
129
+ "val_dataset": "refcoco|unc|val",
130
+ "epoch": 20.0,
131
+ "eval_giou": 0.8225948214530945,
132
+ "eval_ciou": 0.8271152377128601
133
+ },
134
+ {
135
+ "val_dataset": "refcoco|unc|testA",
136
+ "epoch": 20.0,
137
+ "eval_giou": 0.8378753066062927,
138
+ "eval_ciou": 0.843865156173706
139
+ },
140
+ {
141
+ "val_dataset": "refcoco|unc|testB",
142
+ "epoch": 20.0,
143
+ "eval_giou": 0.8056657910346985,
144
+ "eval_ciou": 0.8094556331634521
145
+ },
146
+ {
147
+ "val_dataset": "refcoco+|unc|val",
148
+ "epoch": 20.0,
149
+ "eval_giou": 0.7836799025535583,
150
+ "eval_ciou": 0.7731773853302002
151
+ },
152
+ {
153
+ "val_dataset": "refcoco+|unc|testA",
154
+ "epoch": 20.0,
155
+ "eval_giou": 0.8187907338142395,
156
+ "eval_ciou": 0.8201593160629272
157
+ },
158
+ {
159
+ "val_dataset": "refcoco+|unc|testB",
160
+ "epoch": 20.0,
161
+ "eval_giou": 0.7530681490898132,
162
+ "eval_ciou": 0.741623044013977
163
+ },
164
+ {
165
+ "val_dataset": "refcocog|umd|test",
166
+ "epoch": 20.0,
167
+ "eval_giou": 0.7875200510025024,
168
+ "eval_ciou": 0.7955977320671082
169
+ },
170
+ {
171
+ "val_dataset": "refcocog|umd|val",
172
+ "epoch": 20.0,
173
+ "eval_giou": 0.7873671650886536,
174
+ "eval_ciou": 0.7938516736030579
175
+ }
176
+ ]
lisa-ivl3-2b_aati_sr_bs5acu8e20/events.out.tfevents.1759318805.bask-pg0309u15a.1769939.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c2af395273417556689a5d08107ef7aa0428fa56a3865c50dd72904187edc3
3
+ size 419822
lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759318879.bask-pg0309u15a.1769939.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fce42c53d94518d177027c274c37c2cda048ea86b389c783cfaab5bbca82f90
3
+ size 223363
lisa-ivl3-2b_aati_sr_bs5acu8e20/runs/Oct01_12-39-59_bask-pg0309u15a/events.out.tfevents.1759401820.bask-pg0309u15a.1769939.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181b60d216f4848288f94bcee5fe5a3911b349dbdb683b7bff42595701bca3c8
3
+ size 1402
lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173587561539e9ca8afd26179c60ac88288d2251ffbcdfc852b311549c141bf8
3
+ size 4244119544
lisa-ivl3-2b_nr2_vlorati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be8234d7498e224822badd158dcf7a4e5ff9f0b47390f78268aca5df600092ab
3
+ size 7352
lisa-ivl3-2b_nr2_vlorati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.49992606043815613,
6
+ "eval_ciou": 0.6110827922821045
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5632675290107727,
12
+ "eval_ciou": 0.623365581035614
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5778806805610657,
18
+ "eval_ciou": 0.6206309199333191
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5851303935050964,
24
+ "eval_ciou": 0.6020804643630981
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5820455551147461,
30
+ "eval_ciou": 0.6651975512504578
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.6028497219085693,
36
+ "eval_ciou": 0.6962510943412781
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5977049469947815,
42
+ "eval_ciou": 0.6804401874542236
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6007415652275085,
48
+ "eval_ciou": 0.6796437501907349
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.611798882484436,
54
+ "eval_ciou": 0.6809463500976562
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6134523153305054,
60
+ "eval_ciou": 0.6816759705543518
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.6059213876724243,
66
+ "eval_ciou": 0.6428766846656799
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7799202799797058,
72
+ "eval_ciou": 0.786268413066864
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8031054139137268,
78
+ "eval_ciou": 0.8086256980895996
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7538403272628784,
84
+ "eval_ciou": 0.7585700750350952
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7317773699760437,
90
+ "eval_ciou": 0.7287389039993286
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7756525278091431,
96
+ "eval_ciou": 0.7790922522544861
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6839732527732849,
102
+ "eval_ciou": 0.6755383610725403
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7441026568412781,
108
+ "eval_ciou": 0.7565898895263672
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7429221272468567,
114
+ "eval_ciou": 0.7514289021492004
115
+ }
116
+ ]
lisa-ivl3-2b_nr2_vlorati_sr/events.out.tfevents.1759275794.bask-pg0309u06a.3124946.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a626d6013165f7d36b0d694d7cb70efc26aa7293b04f2b4033c9f6aaa49e6c89
3
+ size 212352
lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759275884.bask-pg0309u06a.3124946.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca84167c42051785d90a2d5cfe045d729b673057867500e41f40bee333d9175
3
+ size 116399
lisa-ivl3-2b_nr2_vlorati_sr/runs/Oct01_00-43-11_bask-pg0309u06a/events.out.tfevents.1759322723.bask-pg0309u06a.3124946.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28be3c60b606cd71d8db4d463b62c2df3c4bd394ae7c4e8ac23f0f56f371b33a
3
+ size 1402
lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4a808fc1338f523d39870fc6f62d3d0c3f65a38db9f6aba9ff19db3dbb7f81
3
+ size 4244119544
lisa-ivl3-2b_nr3_122_2_vlorati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379c086e43080e14b752c22cfffeb2e07a07c0bd6ea16efb629f93d6cf26012c
3
+ size 7352
lisa-ivl3-2b_nr3_122_2_vlorati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.529840350151062,
6
+ "eval_ciou": 0.61083984375
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5495224595069885,
12
+ "eval_ciou": 0.6280785202980042
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5777504444122314,
18
+ "eval_ciou": 0.6374984979629517
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.577897846698761,
24
+ "eval_ciou": 0.6562594771385193
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5785743594169617,
30
+ "eval_ciou": 0.6527135968208313
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5933905243873596,
36
+ "eval_ciou": 0.6383258700370789
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.6087335348129272,
42
+ "eval_ciou": 0.6717571020126343
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6045055985450745,
48
+ "eval_ciou": 0.6598408818244934
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6039425730705261,
54
+ "eval_ciou": 0.6512514352798462
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6099196076393127,
60
+ "eval_ciou": 0.6599507927894592
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.5983391404151917,
66
+ "eval_ciou": 0.6378564238548279
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7846658825874329,
72
+ "eval_ciou": 0.7908703684806824
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8061053156852722,
78
+ "eval_ciou": 0.8121806979179382
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7557012438774109,
84
+ "eval_ciou": 0.7612731456756592
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7339252233505249,
90
+ "eval_ciou": 0.7308076620101929
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7778381109237671,
96
+ "eval_ciou": 0.7786577343940735
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6824975609779358,
102
+ "eval_ciou": 0.6733501553535461
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7487913370132446,
108
+ "eval_ciou": 0.7593491077423096
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.741722047328949,
114
+ "eval_ciou": 0.7474746108055115
115
+ }
116
+ ]
lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337142.bask-pg0308u25a.3571287.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4168921ea1ec0605b7c338bf6d503272031b323ecd4d82df1611cc32a60dffff
3
+ size 88
lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337476.bask-pg0308u25a.3578336.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4061204dccb97c29c5016a06fb6b8106538e983d4706a9d8cfaf792ab96b22
3
+ size 88
lisa-ivl3-2b_nr3_122_2_vlorati_sr/events.out.tfevents.1759337801.bask-pg0308u25a.3585526.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17b438e249a33aa0523b19d071b841150d521f4866f6a455593a32537716925
3
+ size 212352
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-45-37_bask-pg0308u25a/events.out.tfevents.1759337227.bask-pg0308u25a.3571287.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d1081dc5085eec9aa9a354881fc3242aef9ef28c256ec99da1afaf8dbc23db
3
+ size 9142
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-51-12_bask-pg0308u25a/events.out.tfevents.1759337560.bask-pg0308u25a.3578336.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8b67e67b8dea507d2af6b314c185d004a4f6812d77e6466ac9798e3dad32b3
3
+ size 9142
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759337887.bask-pg0308u25a.3585526.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97e63030a506caa5bfdc317829ff84b1ddab6909836eaafeac7140ef1a7d1553
3
+ size 116408
lisa-ivl3-2b_nr3_122_2_vlorati_sr/runs/Oct01_17-56-37_bask-pg0308u25a/events.out.tfevents.1759363414.bask-pg0308u25a.3585526.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39734374fddcd4e3ccd2b2229fe29d71a729ae3759c1b098cb295dc411c47503
3
+ size 1402
lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e765f787a4b4b90a44c541c060bb60a6be32e0be5cf6019395536ef4edefc8fb
3
+ size 4234675816
lisa-ivl3-2b_nr3_122_vlorati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f76ac58c2275447d9bcb0a727e23e4a780f4b7215404a84c15dbfdfb1231188
3
+ size 7352
lisa-ivl3-2b_nr3_122_vlorati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5373343229293823,
6
+ "eval_ciou": 0.6180018782615662
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5649374127388,
12
+ "eval_ciou": 0.636763334274292
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5869829654693604,
18
+ "eval_ciou": 0.7015414834022522
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5917444825172424,
24
+ "eval_ciou": 0.7137655019760132
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5996885895729065,
30
+ "eval_ciou": 0.7088227868080139
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.6110551953315735,
36
+ "eval_ciou": 0.6965492963790894
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.6078798174858093,
42
+ "eval_ciou": 0.718289852142334
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6149584054946899,
48
+ "eval_ciou": 0.6968558430671692
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6170741319656372,
54
+ "eval_ciou": 0.7212521433830261
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6232219934463501,
60
+ "eval_ciou": 0.7210202217102051
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.596360981464386,
66
+ "eval_ciou": 0.6341654062271118
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7841194868087769,
72
+ "eval_ciou": 0.7900864481925964
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8032773733139038,
78
+ "eval_ciou": 0.8108689188957214
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7510504722595215,
84
+ "eval_ciou": 0.7533969879150391
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7322676181793213,
90
+ "eval_ciou": 0.727592408657074
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7752026915550232,
96
+ "eval_ciou": 0.7760695219039917
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6822892427444458,
102
+ "eval_ciou": 0.67359459400177
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7448171973228455,
108
+ "eval_ciou": 0.7525338530540466
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7451841235160828,
114
+ "eval_ciou": 0.7531925439834595
115
+ }
116
+ ]
lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309189.bask-pg0308u29a.2492715.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:742dec7da1d819b4b47b6b557d7c0265011f67a5b2fb8837f2e59ea2e2f5c5b3
3
+ size 88
lisa-ivl3-2b_nr3_122_vlorati_sr/events.out.tfevents.1759309262.bask-pg0308u29a.2496177.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad55cd273aecdd44863bd2d637e019c83e3779f07c25f3f53358f367865772f
3
+ size 88