patrickamadeus commited on
Commit
ad86749
·
verified ·
1 Parent(s): 57055f0

Upload legacy checkpoint from step_500

Browse files
Files changed (3) hide show
  1. config.json +6 -5
  2. model.safetensors +2 -2
  3. training_state.pt +2 -2
config.json CHANGED
@@ -105,24 +105,25 @@
105
  "vlm_load_backbone_weights": true,
106
  "left_tower_prefill_mode": "full",
107
  "use_kv_bridge": false,
108
- "kv_bridge_mode": "residual",
109
  "kv_bridge_affine_stack_depth": 1,
110
  "kv_bridge_adapter_expansion_factor": 1.0,
111
  "kv_bridge_use_gate": false
112
  },
113
  "train_cfg": {
114
- "lr_mp": 5e-05,
115
  "lr_vision_backbone": 0.0,
116
- "lr_language_backbone": 5e-05,
117
  "lr_right_tower": 0.0,
118
  "lr_kv_bridge": 0.0,
119
- "batch_size": 16,
120
- "gradient_accumulation_steps": 8,
121
  "max_grad_norm": 1.0,
122
  "max_training_steps": 20000,
123
  "warmup_ratio": 0.03,
124
  "stats_log_interval": 100,
125
  "compile": false,
 
126
  "eval_in_epochs": false,
127
  "eval_interval": 500,
128
  "use_packing": false,
 
105
  "vlm_load_backbone_weights": true,
106
  "left_tower_prefill_mode": "full",
107
  "use_kv_bridge": false,
108
+ "kv_bridge_mode": "identity",
109
  "kv_bridge_affine_stack_depth": 1,
110
  "kv_bridge_adapter_expansion_factor": 1.0,
111
  "kv_bridge_use_gate": false
112
  },
113
  "train_cfg": {
114
+ "lr_mp": 0.0005,
115
  "lr_vision_backbone": 0.0,
116
+ "lr_language_backbone": 0.0005,
117
  "lr_right_tower": 0.0,
118
  "lr_kv_bridge": 0.0,
119
+ "batch_size": 32,
120
+ "gradient_accumulation_steps": 4,
121
  "max_grad_norm": 1.0,
122
  "max_training_steps": 20000,
123
  "warmup_ratio": 0.03,
124
  "stats_log_interval": 100,
125
  "compile": false,
126
+ "precision": "bf16",
127
  "eval_in_epochs": false,
128
  "eval_interval": 500,
129
  "use_packing": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f12538d76861c7cf7e0af3adc7b512ce64d8a74d9905d15a3c7207c656d6b58
3
- size 1450549888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48293f1361ed7ff852009182d1a92785185d3c0be16d6d660929b9bb46d602a4
3
+ size 725315984
training_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef9667050fda4c6f5f9e6c609b8e6bc25c91f2c77b67a76a0bd0576a93d33435
3
- size 1106724543
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4bf70bf25ecc2c705b99c4e4762056dc0a2c76d090df56db7aa654a579c7f8
3
+ size 553476607