Clean checkpoint: fix naming, remove predictor keys, fix tie_word_embeddings
Browse files- config.json +7 -6
- model.safetensors +2 -2
- modeling_smb_v1.py +9 -9
config.json
CHANGED
|
@@ -15,7 +15,7 @@
|
|
| 15 |
"lm_loss_weight": 0.5,
|
| 16 |
"mask_ratio": 0.5,
|
| 17 |
"max_target_length": 512,
|
| 18 |
-
"model_type": "
|
| 19 |
"momentum_tau": 0.996,
|
| 20 |
"num_queries": 128,
|
| 21 |
"num_resampler_layers": 3,
|
|
@@ -48,7 +48,7 @@
|
|
| 48 |
"rope_scaling": null,
|
| 49 |
"rope_theta": 1000000,
|
| 50 |
"sliding_window": null,
|
| 51 |
-
"tie_word_embeddings":
|
| 52 |
"torch_dtype": "bfloat16",
|
| 53 |
"use_cache": true,
|
| 54 |
"use_sliding_window": false,
|
|
@@ -2120,10 +2120,11 @@
|
|
| 2120 |
"vision_model_name_or_path2": "",
|
| 2121 |
"vocab_size": 151685,
|
| 2122 |
"architectures": [
|
| 2123 |
-
"
|
| 2124 |
],
|
| 2125 |
"auto_map": {
|
| 2126 |
-
"AutoConfig": "
|
| 2127 |
-
"AutoModelForCausalLM": "
|
| 2128 |
-
}
|
|
|
|
| 2129 |
}
|
|
|
|
| 15 |
"lm_loss_weight": 0.5,
|
| 16 |
"mask_ratio": 0.5,
|
| 17 |
"max_target_length": 512,
|
| 18 |
+
"model_type": "smb_v1",
|
| 19 |
"momentum_tau": 0.996,
|
| 20 |
"num_queries": 128,
|
| 21 |
"num_resampler_layers": 3,
|
|
|
|
| 48 |
"rope_scaling": null,
|
| 49 |
"rope_theta": 1000000,
|
| 50 |
"sliding_window": null,
|
| 51 |
+
"tie_word_embeddings": false,
|
| 52 |
"torch_dtype": "bfloat16",
|
| 53 |
"use_cache": true,
|
| 54 |
"use_sliding_window": false,
|
|
|
|
| 2120 |
"vision_model_name_or_path2": "",
|
| 2121 |
"vocab_size": 151685,
|
| 2122 |
"architectures": [
|
| 2123 |
+
"SMBV1ForConditionalGeneration"
|
| 2124 |
],
|
| 2125 |
"auto_map": {
|
| 2126 |
+
"AutoConfig": "modeling_smb_v1.SMBV1Config",
|
| 2127 |
+
"AutoModelForCausalLM": "modeling_smb_v1.SMBV1ForConditionalGeneration"
|
| 2128 |
+
},
|
| 2129 |
+
"tie_word_embeddings": false
|
| 2130 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5dacf6293e6a37868a681daf134df81e6ce51b65d313c017f1512f85ec40332c
|
| 3 |
+
size 4061464080
|
modeling_smb_v1.py
CHANGED
|
@@ -402,8 +402,8 @@ def build_connector(config):
|
|
| 402 |
# CONFIGURATION
|
| 403 |
# =============================================================================
|
| 404 |
|
| 405 |
-
class
|
| 406 |
-
model_type = "
|
| 407 |
|
| 408 |
def __init__(
|
| 409 |
self,
|
|
@@ -475,11 +475,11 @@ class BioPANOmniConfig(PretrainedConfig):
|
|
| 475 |
# MAIN MODEL
|
| 476 |
# =============================================================================
|
| 477 |
|
| 478 |
-
class
|
| 479 |
-
config_class =
|
| 480 |
base_model_prefix = "model"
|
| 481 |
supports_gradient_checkpointing = True
|
| 482 |
-
_no_split_modules = ["
|
| 483 |
_skip_keys_device_placement = "past_key_values"
|
| 484 |
_supports_flash_attn_2 = True
|
| 485 |
|
|
@@ -493,8 +493,8 @@ class BioPANOmniPreTrainedModel(PreTrainedModel):
|
|
| 493 |
module.weight.data.normal_(mean=0.0, std=std)
|
| 494 |
|
| 495 |
|
| 496 |
-
class
|
| 497 |
-
def __init__(self, config:
|
| 498 |
super().__init__(config)
|
| 499 |
|
| 500 |
# Detect LLM backend from text_config
|
|
@@ -904,5 +904,5 @@ class BioPANOmniForConditionalGeneration(BioPANOmniPreTrainedModel, GenerationMi
|
|
| 904 |
# REGISTER WITH AUTO CLASSES
|
| 905 |
# =============================================================================
|
| 906 |
|
| 907 |
-
AutoConfig.register("
|
| 908 |
-
AutoModelForCausalLM.register(
|
|
|
|
| 402 |
# CONFIGURATION
|
| 403 |
# =============================================================================
|
| 404 |
|
| 405 |
+
class SMBV1Config(PretrainedConfig):
|
| 406 |
+
model_type = "smb_v1"
|
| 407 |
|
| 408 |
def __init__(
|
| 409 |
self,
|
|
|
|
| 475 |
# MAIN MODEL
|
| 476 |
# =============================================================================
|
| 477 |
|
| 478 |
+
class SMBV1PreTrainedModel(PreTrainedModel):
|
| 479 |
+
config_class = SMBV1Config
|
| 480 |
base_model_prefix = "model"
|
| 481 |
supports_gradient_checkpointing = True
|
| 482 |
+
_no_split_modules = ["SMBV1VisionAttention"]
|
| 483 |
_skip_keys_device_placement = "past_key_values"
|
| 484 |
_supports_flash_attn_2 = True
|
| 485 |
|
|
|
|
| 493 |
module.weight.data.normal_(mean=0.0, std=std)
|
| 494 |
|
| 495 |
|
| 496 |
+
class SMBV1ForConditionalGeneration(SMBV1PreTrainedModel, GenerationMixin):
|
| 497 |
+
def __init__(self, config: SMBV1Config):
|
| 498 |
super().__init__(config)
|
| 499 |
|
| 500 |
# Detect LLM backend from text_config
|
|
|
|
| 904 |
# REGISTER WITH AUTO CLASSES
|
| 905 |
# =============================================================================
|
| 906 |
|
| 907 |
+
AutoConfig.register("smb_v1", SMBV1Config)
|
| 908 |
+
AutoModelForCausalLM.register(SMBV1Config, SMBV1ForConditionalGeneration)
|