silk-road
/

luotuo-bert-medium

@@ -3,8 +3,10 @@ import torch.nn as nn
 import torch.nn.functional as F
 import torch.distributed as dist
-from simcse.modeling_glm import GLMModel, GLMPreTrainedModel
-import simcse.mse_loss
 import transformers
 from transformers import RobertaTokenizer, AutoModel, PreTrainedModel
@@ -23,7 +25,7 @@ glm_model = None
 def init_glm(path):
     global glm_model
-    glm_model = GLMModel.from_pretrained(path, trust_remote_code=True).to("cuda:0")
     for param in glm_model.parameters():
         param.requires_grad = False
@@ -129,9 +131,6 @@ def cl_forward(cls,
                return_dict=None,
                mlm_input_ids=None,
                mlm_labels=None,
-               left_emb=None,
-               right_emb=None,
-               kl_loss=False
                ):
     return_dict = return_dict if return_dict is not None else cls.config.use_return_dict
     ori_input_ids = input_ids
@@ -184,13 +183,29 @@ def cl_forward(cls,
     # If using "cls", we add an extra MLP layer
     # (same as BERT's original implementation) over the representation.
     if cls.pooler_type == "cls":
         pooler_output = cls.mlp(pooler_output)
     # Separate representation
     z1, z2 = pooler_output[:, 0], pooler_output[:, 1]
-    tensor_left = left_emb
-    tensor_right = right_emb
     # Hard negative
     if num_sent == 3:
@@ -219,45 +234,44 @@ def cl_forward(cls,
         # Get full batch embeddings: (bs x N, hidden)
         z1 = torch.cat(z1_list, 0)
         z2 = torch.cat(z2_list, 0)
-    mse_loss = F.mse_loss(z1, tensor_left) + F.mse_loss(z2, tensor_right)
-    # softmax_row, softmax_col = simcse.mse_loss.giveMeMatrix(tensor_left, tensor_right)
-    # softmax_row_model, softmax_col_model = simcse.mse_loss.giveMeMatrix(z1,z2)
-    # ziang_labels = torch.tensor([i for i in range(8)], device='cuda:0')
     """
     this is KL div loss
     """
-    KL_loss = nn.KLDivLoss(reduction="batchmean")
-    beta = 5
-    # openai的embed，giveMeMatrix返回一个normalized过前后向量，相乘后的矩阵
-    cos_sim_matrix_openai = simcse.mse_loss.giveMeMatrix(tensor_left, tensor_right)
-    beta_scaled_cos_sim_matrix_openai = beta * cos_sim_matrix_openai
-    # 我们的embed，giveMeMatrix返回一个normalized过前后向量，相乘后的矩阵
-    cos_sim_matrix_data = simcse.mse_loss.giveMeMatrix(z1, z2)
-    beta_scaled_cos_sim_matrix_data = beta * cos_sim_matrix_data
-    beta_scaled_cos_sim_matrix_openai_vertical = beta_scaled_cos_sim_matrix_openai.softmax(dim=1)
-    beta_scaled_cos_sim_matrix_openai_horizontal = beta_scaled_cos_sim_matrix_openai.softmax(dim=0)
-    beta_scaled_cos_sim_matrix_data_vertical = beta_scaled_cos_sim_matrix_data.softmax(dim=1)
-    beta_scaled_cos_sim_matrix_data_horizontal = beta_scaled_cos_sim_matrix_data.softmax(dim=0)
-    # remove reduction="batchmean"
-    KL_vertical_loss = KL_loss(beta_scaled_cos_sim_matrix_data_vertical.log(), beta_scaled_cos_sim_matrix_openai_vertical)
-    KL_horizontal_loss = KL_loss(beta_scaled_cos_sim_matrix_data_horizontal.log(), beta_scaled_cos_sim_matrix_openai_horizontal)
-    KL_loss = (KL_vertical_loss + KL_horizontal_loss) / 2
-    # KL_row_loss = F.kl_div(softmax_row_model.log(), softmax_row, reduction='batchmean')
-    # KL_col_loss = F.kl_div(softmax_col_model.log(), softmax_col, reduction='batchmean')
-    # KL_loss = (KL_row_loss + KL_col_loss) / 2
-    ziang_loss = KL_loss + mse_loss
     cos_sim = cls.sim(z1.unsqueeze(1), z2.unsqueeze(0))
@@ -292,10 +306,14 @@ def cl_forward(cls,
         output = (cos_sim,) + outputs[2:]
         return ((loss,) + output) if loss is not None else output
     return SequenceClassifierOutput(
         loss=ziang_loss,
         logits=cos_sim,
         hidden_states=outputs.hidden_states,
     )
@@ -378,8 +396,6 @@ class BertForCL(BertPreTrainedModel):
                 sent_emb=False,
                 mlm_input_ids=None,
                 mlm_labels=None,
-                left_emb=None,
-                right_emb=None,
                 ):
         if self.model_args.init_embeddings_model:
             input_ids_for_glm = input_ids.view((-1, input_ids.size(-1)))  # (bs * num_sent, len)
@@ -428,8 +444,6 @@ class BertForCL(BertPreTrainedModel):
                               return_dict=return_dict,
                               mlm_input_ids=mlm_input_ids,
                               mlm_labels=mlm_labels,
-                              left_emb=left_emb,
-                              right_emb=right_emb,
                               )
@@ -467,8 +481,6 @@ class RobertaForCL(RobertaPreTrainedModel):
                 sent_emb=False,
                 mlm_input_ids=None,
                 mlm_labels=None,
-                left_emb=None,
-                right_emb=None,
                 ):
         if self.model_args.init_embeddings_model and not sent_emb:
@@ -518,7 +530,5 @@ class RobertaForCL(RobertaPreTrainedModel):
                               return_dict=return_dict,
                               mlm_input_ids=mlm_input_ids,
                               mlm_labels=mlm_labels,
-                                left_emb=left_emb,
-                                right_emb=right_emb,
                               )

 import torch.nn.functional as F
 import torch.distributed as dist
+# from simcse.modeling_glm import GLMModel, GLMPreTrainedModel
+# import simcse.readEmbeddings
+# import simcse.mse_loss
 import transformers
 from transformers import RobertaTokenizer, AutoModel, PreTrainedModel
 def init_glm(path):
     global glm_model
+    glm_model = AutoModel.from_pretrained(path, trust_remote_code=True).to("cuda:0")
     for param in glm_model.parameters():
         param.requires_grad = False
                return_dict=None,
                mlm_input_ids=None,
                mlm_labels=None,
                ):
     return_dict = return_dict if return_dict is not None else cls.config.use_return_dict
     ori_input_ids = input_ids
     # If using "cls", we add an extra MLP layer
     # (same as BERT's original implementation) over the representation.
     if cls.pooler_type == "cls":
+        # print("this pooler is cls and running mlp")
         pooler_output = cls.mlp(pooler_output)
     # Separate representation
     z1, z2 = pooler_output[:, 0], pooler_output[:, 1]
+    # simcse.mse_loss.global_num += 8
+    # print(simcse.mse_loss.global_num)
+    tensor_left, tensor_right = simcse.mse_loss.giveMeBatchEmbeddings(simcse.mse_loss.global_num,
+                                                                      simcse.readEmbeddings.data)
+    simcse.mse_loss.global_num += 32
+    # print(F.mse_loss(z1,tensor_left))
+    # print(F.mse_loss(z2,tensor_right))
+    # print(tensor_left.size())
+    # print(tensor_right.size())
+    # print(len(pooler_output[:,]))
+    # print(len(z1))
+    # print(len(z2))
+    # print(len(z1[0]))
+    # print(len(z2[0]))
+    # print(F.mse_loss(z1[0], z2[0]))
     # Hard negative
     if num_sent == 3:
         # Get full batch embeddings: (bs x N, hidden)
         z1 = torch.cat(z1_list, 0)
         z2 = torch.cat(z2_list, 0)
+    ziang_loss = F.mse_loss(z1, tensor_left) + F.mse_loss(z2, tensor_right)
+    # print("\n MSE Loss is : ", ziang_loss)
+    softmax_row, softmax_col = simcse.mse_loss.giveMeMatrix(tensor_left, tensor_right)
+    softmax_row_model, softmax_col_model = simcse.mse_loss.giveMeMatrix(z1,z2)
+    ziang_labels = torch.tensor([i for i in range(32)], device='cuda:0')
+    """
+    this is cross entropy loss
+    """
+    row_loss = F.cross_entropy(softmax_row, ziang_labels)
+    col_loss = F.cross_entropy(softmax_col, ziang_labels)
+    softmax_loss = (row_loss + col_loss) / 2
     """
     this is KL div loss
     """
+    KL_row_loss = F.kl_div(softmax_row_model.log(), softmax_row, reduction='batchmean')
+    KL_col_loss = F.kl_div(softmax_col_model.log(), softmax_col, reduction='batchmean')
+    KL_loss = (KL_row_loss + KL_col_loss) / 2
+    ziang_loss = KL_loss + ziang_loss + softmax_loss
+    # ziang_loss = softmax_loss + ziang_loss
+    # ziang_loss = F.mse_loss(
+    #     torch.nn.functional.cosine_similarity(tensor_left, tensor_right),
+    #     torch.nn.functional.cosine_similarity(z1,z2)
+    #     )
+    # ziang_loss /= 0.5
+    # print("\n Softmax Loss is : ", softmax_loss)
+    # print("\n Openai Cos Similarity between two paragraph: \n", torch.nn.functional.cosine_similarity(tensor_left, tensor_right))
+    # print("\nCos Similarity between two paragraph: \n", torch.nn.functional.cosine_similarity(z1, z2))
+    # print("\n My total loss currently: ", ziang_loss)
+    # print(z1.size())
+    # print(z2.size())
     cos_sim = cls.sim(z1.unsqueeze(1), z2.unsqueeze(0))
         output = (cos_sim,) + outputs[2:]
         return ((loss,) + output) if loss is not None else output
+    # print("original " , loss)
     return SequenceClassifierOutput(
+        # loss=loss,
         loss=ziang_loss,
         logits=cos_sim,
         hidden_states=outputs.hidden_states,
+        # attentions=outputs.attentions,
     )
                 sent_emb=False,
                 mlm_input_ids=None,
                 mlm_labels=None,
                 ):
         if self.model_args.init_embeddings_model:
             input_ids_for_glm = input_ids.view((-1, input_ids.size(-1)))  # (bs * num_sent, len)
                               return_dict=return_dict,
                               mlm_input_ids=mlm_input_ids,
                               mlm_labels=mlm_labels,
                               )
                 sent_emb=False,
                 mlm_input_ids=None,
                 mlm_labels=None,
                 ):
         if self.model_args.init_embeddings_model and not sent_emb:
                               return_dict=return_dict,
                               mlm_input_ids=mlm_input_ids,
                               mlm_labels=mlm_labels,
                               )