huu-ontocord commited on
Commit
bf135db
·
verified ·
1 Parent(s): a7bfe63

Update seed2_tokenizer.py

Browse files
Files changed (1) hide show
  1. seed2_tokenizer.py +5 -5
seed2_tokenizer.py CHANGED
@@ -1654,7 +1654,7 @@ class Blip2Base(PreTrainedModel):
1654
 
1655
  @classmethod
1656
  def init_Qformer(cls, encoder_config, num_query_token, vision_width, cross_attention_freq=2, cache_dir=""):
1657
- #print ("loading")
1658
  encoder_config = BertConfig.from_pretrained("bert-base-uncased")
1659
  encoder_config.encoder_width = vision_width
1660
  # insert cross-attention layer every other block
@@ -2091,10 +2091,11 @@ class Seed2Tokenizer(PreTrainedModel):
2091
  ])
2092
 
2093
  shape_latents = torch.Size([1, 4, 96, 96])
2094
- self.latents = torch.randn(shape_latents, generator=None, layout=torch.strided)
 
2095
 
2096
  shape_noise = torch.Size([1, 1024])
2097
- self.noise = torch.randn(shape_noise, generator=None, layout=torch.strided)
2098
 
2099
  self.model = model
2100
  self.processor = processor
@@ -2183,5 +2184,4 @@ class Seed2Tokenizer(PreTrainedModel):
2183
 
2184
  image_torch = image_torch.to(self.device)
2185
  return self.encode(image_torch, visual_encoder)
2186
-
2187
-
 
1654
 
1655
  @classmethod
1656
  def init_Qformer(cls, encoder_config, num_query_token, vision_width, cross_attention_freq=2, cache_dir=""):
1657
+ print ("loading")
1658
  encoder_config = BertConfig.from_pretrained("bert-base-uncased")
1659
  encoder_config.encoder_width = vision_width
1660
  # insert cross-attention layer every other block
 
2091
  ])
2092
 
2093
  shape_latents = torch.Size([1, 4, 96, 96])
2094
+ self.register_buffer("latents",torch.randn(shape_latents, generator=None, layout=torch.strided))
2095
+
2096
 
2097
  shape_noise = torch.Size([1, 1024])
2098
+ self.register_buffer("noise",torch.randn(shape_noise, generator=None, layout=torch.strided))
2099
 
2100
  self.model = model
2101
  self.processor = processor
 
2184
 
2185
  image_torch = image_torch.to(self.device)
2186
  return self.encode(image_torch, visual_encoder)
2187
+