Upload legacy checkpoint from step_500
Browse files- config.json +6 -5
- model.safetensors +2 -2
- training_state.pt +2 -2
config.json
CHANGED
|
@@ -105,24 +105,25 @@
|
|
| 105 |
"vlm_load_backbone_weights": true,
|
| 106 |
"left_tower_prefill_mode": "full",
|
| 107 |
"use_kv_bridge": false,
|
| 108 |
-
"kv_bridge_mode": "
|
| 109 |
"kv_bridge_affine_stack_depth": 1,
|
| 110 |
"kv_bridge_adapter_expansion_factor": 1.0,
|
| 111 |
"kv_bridge_use_gate": false
|
| 112 |
},
|
| 113 |
"train_cfg": {
|
| 114 |
-
"lr_mp":
|
| 115 |
"lr_vision_backbone": 0.0,
|
| 116 |
-
"lr_language_backbone":
|
| 117 |
"lr_right_tower": 0.0,
|
| 118 |
"lr_kv_bridge": 0.0,
|
| 119 |
-
"batch_size":
|
| 120 |
-
"gradient_accumulation_steps":
|
| 121 |
"max_grad_norm": 1.0,
|
| 122 |
"max_training_steps": 20000,
|
| 123 |
"warmup_ratio": 0.03,
|
| 124 |
"stats_log_interval": 100,
|
| 125 |
"compile": false,
|
|
|
|
| 126 |
"eval_in_epochs": false,
|
| 127 |
"eval_interval": 500,
|
| 128 |
"use_packing": false,
|
|
|
|
| 105 |
"vlm_load_backbone_weights": true,
|
| 106 |
"left_tower_prefill_mode": "full",
|
| 107 |
"use_kv_bridge": false,
|
| 108 |
+
"kv_bridge_mode": "identity",
|
| 109 |
"kv_bridge_affine_stack_depth": 1,
|
| 110 |
"kv_bridge_adapter_expansion_factor": 1.0,
|
| 111 |
"kv_bridge_use_gate": false
|
| 112 |
},
|
| 113 |
"train_cfg": {
|
| 114 |
+
"lr_mp": 0.0005,
|
| 115 |
"lr_vision_backbone": 0.0,
|
| 116 |
+
"lr_language_backbone": 0.0005,
|
| 117 |
"lr_right_tower": 0.0,
|
| 118 |
"lr_kv_bridge": 0.0,
|
| 119 |
+
"batch_size": 32,
|
| 120 |
+
"gradient_accumulation_steps": 4,
|
| 121 |
"max_grad_norm": 1.0,
|
| 122 |
"max_training_steps": 20000,
|
| 123 |
"warmup_ratio": 0.03,
|
| 124 |
"stats_log_interval": 100,
|
| 125 |
"compile": false,
|
| 126 |
+
"precision": "bf16",
|
| 127 |
"eval_in_epochs": false,
|
| 128 |
"eval_interval": 500,
|
| 129 |
"use_packing": false,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48293f1361ed7ff852009182d1a92785185d3c0be16d6d660929b9bb46d602a4
|
| 3 |
+
size 725315984
|
training_state.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c4bf70bf25ecc2c705b99c4e4762056dc0a2c76d090df56db7aa654a579c7f8
|
| 3 |
+
size 553476607
|