mingdali
/

ChatTruth-7B

@@ -25,13 +25,11 @@ def sliding_window(matrix, window_size, stride):
     window_cols = (width - window_size[1]) // stride + 1
     images_448 = F.interpolate(matrix, size=window_size, mode='bicubic')
     windows = []
-#     pdb.set_trace()
     for i in range(window_rows):
         windows_col = []
         for j in range(window_cols):
             window = matrix[:,:, i*stride:i*stride+window_size[0],  j*stride:j*stride+window_size[1]]
             windows.append(window)
-#         windows.append(windows_col)
     windows.append(images_448)
     images = torch.cat(windows,dim=1)
     images = images.reshape(b*5,c,window_size[0], window_size[0])
@@ -145,12 +143,9 @@ class Resampler(nn.Module):
         self.ln_kv = norm_layer(embed_dim)
         self.apply(self._init_weights)
-      #  pdb.set_trace()
-        #self.load_state_dict(torch.load('/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth'))
     def _init_weights(self, m):
-       # self.load_state_dict(torch.load('/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth'))
-        #pdb.set_trace()
         if isinstance(m, nn.Linear):
             trunc_normal_(m.weight, std=.02)
             if isinstance(m, nn.Linear) and m.bias is not None:
@@ -160,7 +155,6 @@ class Resampler(nn.Module):
             nn.init.constant_(m.weight, 1.0)
     def forward(self, x, attn_mask=None):
-        #pdb.set_trace()
         pos_embed = get_abs_pos(self.pos_embed, x.size(1))
         x = self.kv_proj(x)
@@ -401,7 +395,6 @@ class VisionTransformer(nn.Module):
             act_layer=act_layer,
             norm_layer=norm_layer,
         )
-#         pdb.set_trace()
         self.attn_pool = Resampler(
             grid_size=int(math.sqrt(n_queries)),
             embed_dim=output_dim,
@@ -418,14 +411,10 @@ class VisionTransformer(nn.Module):
         )
         self.ln_post = norm_layer(output_dim)
         self.proj = nn.Parameter((output_dim** -0.5) * torch.randn(output_dim, output_dim))
-    #    self.attn_pool2.load_state_dict(torch.load('/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth'))
-  #  def initialize_vision_modules(self,lpath):
-  #      self.attn_pool2[0].load_state_dict(torch.load(lpath))
     def forward(self, x: torch.Tensor):
-        #pdb.set_trace()
-        #torch.save(self.attn_pool.state_dict(), '/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth')
         x = x.to(
             dtype=self.transformer.get_cast_dtype(),
             device=self.transformer.get_cast_device(),
@@ -442,7 +431,6 @@ class VisionTransformer(nn.Module):
         x = x.permute(1, 0, 2)  # NLD -> LND
         x = self.transformer(x)
         x = x.permute(1, 0, 2)  # LND -> NLD
-     #   pdb.set_trace()
         src_size = int(math.sqrt(x.shape[1]))
         x = x.reshape(x.shape[0]//5,5,-1, x.shape[-1])
         x1 = x[:,4,:,:]
@@ -454,7 +442,6 @@ class VisionTransformer(nn.Module):
         x1 = self.attn_pool(x1)
         x = self.post_pro(x)
         x1 = self.post_pro(x1)
-       # return x1
         return torch.cat([x,x1],dim=1)
     def post_pro(self, x):
@@ -465,7 +452,7 @@ class VisionTransformer(nn.Module):
     def encode(self, image_paths: List[str]):
         images = []
-#         pdb.set_trace()
         for image_path in image_paths:
             try:
                 if image_path.startswith("http://") or image_path.startswith("https://"):
@@ -474,7 +461,6 @@ class VisionTransformer(nn.Module):
                     image = self.image_transform(Image.open(image_path).convert("RGB"))
             except:
                 image = torch.zeros((3, 448*2, 448*2))
-#             pdb.set_trace()
             images.append(image)
         images = torch.stack(images, dim=0)
         windows = sliding_window(images,window_size=(448,448),stride=448)

     window_cols = (width - window_size[1]) // stride + 1
     images_448 = F.interpolate(matrix, size=window_size, mode='bicubic')
     windows = []
     for i in range(window_rows):
         windows_col = []
         for j in range(window_cols):
             window = matrix[:,:, i*stride:i*stride+window_size[0],  j*stride:j*stride+window_size[1]]
             windows.append(window)
     windows.append(images_448)
     images = torch.cat(windows,dim=1)
     images = images.reshape(b*5,c,window_size[0], window_size[0])
         self.ln_kv = norm_layer(embed_dim)
         self.apply(self._init_weights)
     def _init_weights(self, m):
         if isinstance(m, nn.Linear):
             trunc_normal_(m.weight, std=.02)
             if isinstance(m, nn.Linear) and m.bias is not None:
             nn.init.constant_(m.weight, 1.0)
     def forward(self, x, attn_mask=None):
         pos_embed = get_abs_pos(self.pos_embed, x.size(1))
         x = self.kv_proj(x)
             act_layer=act_layer,
             norm_layer=norm_layer,
         )
         self.attn_pool = Resampler(
             grid_size=int(math.sqrt(n_queries)),
             embed_dim=output_dim,
         )
         self.ln_post = norm_layer(output_dim)
         self.proj = nn.Parameter((output_dim** -0.5) * torch.randn(output_dim, output_dim))
     def forward(self, x: torch.Tensor):
         x = x.to(
             dtype=self.transformer.get_cast_dtype(),
             device=self.transformer.get_cast_device(),
         x = x.permute(1, 0, 2)  # NLD -> LND
         x = self.transformer(x)
         x = x.permute(1, 0, 2)  # LND -> NLD
         src_size = int(math.sqrt(x.shape[1]))
         x = x.reshape(x.shape[0]//5,5,-1, x.shape[-1])
         x1 = x[:,4,:,:]
         x1 = self.attn_pool(x1)
         x = self.post_pro(x)
         x1 = self.post_pro(x1)
         return torch.cat([x,x1],dim=1)
     def post_pro(self, x):
     def encode(self, image_paths: List[str]):
         images = []
         for image_path in image_paths:
             try:
                 if image_path.startswith("http://") or image_path.startswith("https://"):
                     image = self.image_transform(Image.open(image_path).convert("RGB"))
             except:
                 image = torch.zeros((3, 448*2, 448*2))
             images.append(image)
         images = torch.stack(images, dim=0)
         windows = sliding_window(images,window_size=(448,448),stride=448)