irsyad-smb commited on
Commit
26056d6
·
verified ·
1 Parent(s): 521c015

Clean checkpoint: fix naming, remove predictor keys, fix tie_word_embeddings

Browse files
Files changed (3) hide show
  1. config.json +7 -6
  2. model.safetensors +2 -2
  3. modeling_smb_v1.py +9 -9
config.json CHANGED
@@ -15,7 +15,7 @@
15
  "lm_loss_weight": 0.5,
16
  "mask_ratio": 0.5,
17
  "max_target_length": 512,
18
- "model_type": "biopan_omni",
19
  "momentum_tau": 0.996,
20
  "num_queries": 128,
21
  "num_resampler_layers": 3,
@@ -48,7 +48,7 @@
48
  "rope_scaling": null,
49
  "rope_theta": 1000000,
50
  "sliding_window": null,
51
- "tie_word_embeddings": true,
52
  "torch_dtype": "bfloat16",
53
  "use_cache": true,
54
  "use_sliding_window": false,
@@ -2120,10 +2120,11 @@
2120
  "vision_model_name_or_path2": "",
2121
  "vocab_size": 151685,
2122
  "architectures": [
2123
- "BioPANOmniForConditionalGeneration"
2124
  ],
2125
  "auto_map": {
2126
- "AutoConfig": "modeling_biopan_omni.BioPANOmniConfig",
2127
- "AutoModelForCausalLM": "modeling_biopan_omni.BioPANOmniForConditionalGeneration"
2128
- }
 
2129
  }
 
15
  "lm_loss_weight": 0.5,
16
  "mask_ratio": 0.5,
17
  "max_target_length": 512,
18
+ "model_type": "smb_v1",
19
  "momentum_tau": 0.996,
20
  "num_queries": 128,
21
  "num_resampler_layers": 3,
 
48
  "rope_scaling": null,
49
  "rope_theta": 1000000,
50
  "sliding_window": null,
51
+ "tie_word_embeddings": false,
52
  "torch_dtype": "bfloat16",
53
  "use_cache": true,
54
  "use_sliding_window": false,
 
2120
  "vision_model_name_or_path2": "",
2121
  "vocab_size": 151685,
2122
  "architectures": [
2123
+ "SMBV1ForConditionalGeneration"
2124
  ],
2125
  "auto_map": {
2126
+ "AutoConfig": "modeling_smb_v1.SMBV1Config",
2127
+ "AutoModelForCausalLM": "modeling_smb_v1.SMBV1ForConditionalGeneration"
2128
+ },
2129
+ "tie_word_embeddings": false
2130
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb814b1a4d22b0ad10d5f923e1f1995d3d5316db6cd58f2fc880e6234209f0f8
3
- size 3658396544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dacf6293e6a37868a681daf134df81e6ce51b65d313c017f1512f85ec40332c
3
+ size 4061464080
modeling_smb_v1.py CHANGED
@@ -402,8 +402,8 @@ def build_connector(config):
402
  # CONFIGURATION
403
  # =============================================================================
404
 
405
- class BioPANOmniConfig(PretrainedConfig):
406
- model_type = "biopan_omni"
407
 
408
  def __init__(
409
  self,
@@ -475,11 +475,11 @@ class BioPANOmniConfig(PretrainedConfig):
475
  # MAIN MODEL
476
  # =============================================================================
477
 
478
- class BioPANOmniPreTrainedModel(PreTrainedModel):
479
- config_class = BioPANOmniConfig
480
  base_model_prefix = "model"
481
  supports_gradient_checkpointing = True
482
- _no_split_modules = ["BioPANOmniVisionAttention"]
483
  _skip_keys_device_placement = "past_key_values"
484
  _supports_flash_attn_2 = True
485
 
@@ -493,8 +493,8 @@ class BioPANOmniPreTrainedModel(PreTrainedModel):
493
  module.weight.data.normal_(mean=0.0, std=std)
494
 
495
 
496
- class BioPANOmniForConditionalGeneration(BioPANOmniPreTrainedModel, GenerationMixin):
497
- def __init__(self, config: BioPANOmniConfig):
498
  super().__init__(config)
499
 
500
  # Detect LLM backend from text_config
@@ -904,5 +904,5 @@ class BioPANOmniForConditionalGeneration(BioPANOmniPreTrainedModel, GenerationMi
904
  # REGISTER WITH AUTO CLASSES
905
  # =============================================================================
906
 
907
- AutoConfig.register("biopan_omni", BioPANOmniConfig)
908
- AutoModelForCausalLM.register(BioPANOmniConfig, BioPANOmniForConditionalGeneration)
 
402
  # CONFIGURATION
403
  # =============================================================================
404
 
405
+ class SMBV1Config(PretrainedConfig):
406
+ model_type = "smb_v1"
407
 
408
  def __init__(
409
  self,
 
475
  # MAIN MODEL
476
  # =============================================================================
477
 
478
+ class SMBV1PreTrainedModel(PreTrainedModel):
479
+ config_class = SMBV1Config
480
  base_model_prefix = "model"
481
  supports_gradient_checkpointing = True
482
+ _no_split_modules = ["SMBV1VisionAttention"]
483
  _skip_keys_device_placement = "past_key_values"
484
  _supports_flash_attn_2 = True
485
 
 
493
  module.weight.data.normal_(mean=0.0, std=std)
494
 
495
 
496
+ class SMBV1ForConditionalGeneration(SMBV1PreTrainedModel, GenerationMixin):
497
+ def __init__(self, config: SMBV1Config):
498
  super().__init__(config)
499
 
500
  # Detect LLM backend from text_config
 
904
  # REGISTER WITH AUTO CLASSES
905
  # =============================================================================
906
 
907
+ AutoConfig.register("smb_v1", SMBV1Config)
908
+ AutoModelForCausalLM.register(SMBV1Config, SMBV1ForConditionalGeneration)