shuowangmark commited on
Commit
2889130
·
verified ·
1 Parent(s): c9ec924

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +0 -12
config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "Ubit": 100,
3
  "_attn_implementation_autoset": true,
4
- "_name_or_path": "/horizon-bucket/robot_lab/users/shuo03.wang/nips2025/sft_2B_model/step3_aux_qwen/tmp-checkpoint-25000",
5
  "architectures": [
6
  "LlavaLlamaModel"
7
  ],
@@ -26,13 +25,9 @@
26
  "group_size": -1,
27
  "hidden_size": 1536,
28
  "image_aspect_ratio": "dynamic",
29
- "image_encoder": {
30
- "_target_": "robo_orchard_lab.models.monodream.multimodal_encoder.BasicImageEncoder"
31
- },
32
  "interpolate_mode": "linear",
33
  "llm_cfg": {
34
  "_attn_implementation_autoset": false,
35
- "_name_or_path": "/horizon-bucket/robot_lab/users/shuo03.wang/nips2025/sft_2B_model/step3_aux_qwen/tmp-checkpoint-25000/llm",
36
  "add_cross_attention": false,
37
  "architectures": [
38
  "Qwen2ForCausalLM"
@@ -125,7 +120,6 @@
125
  "mm_projector": "mlp_downsample_3x3_fix",
126
  "mm_projector_cfg": {
127
  "_attn_implementation_autoset": false,
128
- "_name_or_path": "/horizon-bucket/robot_lab/users/shuo03.wang/nips2025/sft_2B_model/step3_aux_qwen/tmp-checkpoint-25000/mm_projector",
129
  "add_cross_attention": false,
130
  "architectures": [
131
  "MultimodalProjector"
@@ -196,7 +190,6 @@
196
  "mm_vision_select_feature": "cls_patch",
197
  "mm_vision_select_layer": -2,
198
  "model_dtype": "torch.bfloat16",
199
- "model_name_or_path": "/bucket/input/robot_lab/users/shuo03.wang/NVILA-Lite-2B",
200
  "model_type": "llava_llama",
201
  "num_time_tokens": 0,
202
  "num_video_frames": 8,
@@ -213,7 +206,6 @@
213
  "refine_mlp_blocksize": false,
214
  "refine_residual_fp": false,
215
  "refine_row_blocksize": 4,
216
- "resume_path": "/bucket/input/robot_lab/users/shuo03.wang/NVILA-Lite-2B",
217
  "row_blocksize": -1,
218
  "row_blocksize_optimizer": 1,
219
  "s2": false,
@@ -232,14 +224,10 @@
232
  "tune_vision_tower": true,
233
  "use_quantize_optimizer": false,
234
  "version": "auto",
235
- "video_encoder": {
236
- "_target_": "robo_orchard_lab.models.monodream.multimodal_encoder.BasicVideoEncoder"
237
- },
238
  "vision_resolution": -1,
239
  "vision_tower": "Efficient-Large-Model/paligemma-siglip-so400m-patch14-448",
240
  "vision_tower_cfg": {
241
  "_attn_implementation_autoset": false,
242
- "_name_or_path": "/horizon-bucket/robot_lab/users/shuo03.wang/nips2025/sft_2B_model/step3_aux_qwen/tmp-checkpoint-25000/vision_tower",
243
  "add_cross_attention": false,
244
  "architectures": [
245
  "SiglipVisionModel"
 
1
  {
2
  "Ubit": 100,
3
  "_attn_implementation_autoset": true,
 
4
  "architectures": [
5
  "LlavaLlamaModel"
6
  ],
 
25
  "group_size": -1,
26
  "hidden_size": 1536,
27
  "image_aspect_ratio": "dynamic",
 
 
 
28
  "interpolate_mode": "linear",
29
  "llm_cfg": {
30
  "_attn_implementation_autoset": false,
 
31
  "add_cross_attention": false,
32
  "architectures": [
33
  "Qwen2ForCausalLM"
 
120
  "mm_projector": "mlp_downsample_3x3_fix",
121
  "mm_projector_cfg": {
122
  "_attn_implementation_autoset": false,
 
123
  "add_cross_attention": false,
124
  "architectures": [
125
  "MultimodalProjector"
 
190
  "mm_vision_select_feature": "cls_patch",
191
  "mm_vision_select_layer": -2,
192
  "model_dtype": "torch.bfloat16",
 
193
  "model_type": "llava_llama",
194
  "num_time_tokens": 0,
195
  "num_video_frames": 8,
 
206
  "refine_mlp_blocksize": false,
207
  "refine_residual_fp": false,
208
  "refine_row_blocksize": 4,
 
209
  "row_blocksize": -1,
210
  "row_blocksize_optimizer": 1,
211
  "s2": false,
 
224
  "tune_vision_tower": true,
225
  "use_quantize_optimizer": false,
226
  "version": "auto",
 
 
 
227
  "vision_resolution": -1,
228
  "vision_tower": "Efficient-Large-Model/paligemma-siglip-so400m-patch14-448",
229
  "vision_tower_cfg": {
230
  "_attn_implementation_autoset": false,
 
231
  "add_cross_attention": false,
232
  "architectures": [
233
  "SiglipVisionModel"