jingyaogong commited on
Commit
86cffcc
·
verified ·
1 Parent(s): 2860e7c

Upload 9 files

Browse files
minimind-3v/config.json CHANGED
@@ -38,5 +38,6 @@
38
  "router_aux_loss_coef": 0.0005,
39
  "transformers_version": "4.57.6",
40
  "use_moe": false,
41
- "vocab_size": 6400
42
- }
 
 
38
  "router_aux_loss_coef": 0.0005,
39
  "transformers_version": "4.57.6",
40
  "use_moe": false,
41
+ "vocab_size": 6400,
42
+ "tie_word_embeddings": true
43
+ }
minimind-3v/model_vlm.py CHANGED
@@ -21,28 +21,22 @@ class VLMConfig(MiniMindConfig):
21
  super().__init__(**kwargs)
22
 
23
  class MMVisionProjector(nn.Module):
24
- def __init__(self, in_dim, out_dim, source_tokens=256, target_tokens=64):
25
  super().__init__()
26
- self.target_tokens = target_tokens
27
- self.merge = source_tokens // target_tokens
28
  self.mlp = nn.Sequential(
29
- nn.LayerNorm(in_dim * self.merge),
30
- nn.Linear(in_dim * self.merge, out_dim),
31
  nn.GELU(),
32
  nn.Linear(out_dim, out_dim),
33
  )
34
  def forward(self, x):
35
- b, n, d = x.shape
36
- side = int(n ** 0.5)
37
- s = int(self.merge ** 0.5)
38
- x = x.view(b, side // s, s, side // s, s, d).permute(0, 1, 3, 2, 4, 5).reshape(b, self.target_tokens, d * self.merge)
39
  return self.mlp(x)
40
 
41
  # 继承自语言模型
42
  class MiniMindVLM(MiniMindForCausalLM):
43
  config_class = VLMConfig
44
 
45
- def __init__(self, config: VLMConfig = None, vision_model_path="./model/siglip2-base-p16-256-ve"):
46
  self.config = config or VLMConfig()
47
  super().__init__(self.config)
48
  self.vision_encoder, self.processor = self.__class__.get_vision_model(vision_model_path)
 
21
  super().__init__(**kwargs)
22
 
23
  class MMVisionProjector(nn.Module):
24
+ def __init__(self, in_dim, out_dim, source_tokens=64, target_tokens=64):
25
  super().__init__()
 
 
26
  self.mlp = nn.Sequential(
27
+ nn.LayerNorm(in_dim),
28
+ nn.Linear(in_dim, out_dim),
29
  nn.GELU(),
30
  nn.Linear(out_dim, out_dim),
31
  )
32
  def forward(self, x):
 
 
 
 
33
  return self.mlp(x)
34
 
35
  # 继承自语言模型
36
  class MiniMindVLM(MiniMindForCausalLM):
37
  config_class = VLMConfig
38
 
39
+ def __init__(self, config: VLMConfig = None, vision_model_path="./model/siglip2-base-p32-256-ve"):
40
  self.config = config or VLMConfig()
41
  super().__init__(self.config)
42
  self.vision_encoder, self.processor = self.__class__.get_vision_model(vision_model_path)
minimind-3v/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf6028706a526a5430de8ebf0c243a95ab8b318d668d5f4d91c1f5a1e55a760
3
+ size 130220750