| Very well working 2 layer NN for projection. | |
| Trained with custom N Pairs loss + hard negative mining. | |
| margin = 1.0 | |
| Load in using following code: | |
| ```python | |
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoConfig, AutoTokenizer, AutoModel | |
| from huggingface_hub import hf_hub_download | |
| import json | |
| from types import SimpleNamespace | |
| #model architecture - needed since this is a custom model | |
| class ProjectionModel(nn.Module): | |
| def __init__(self, config): | |
| super(ProjectionModel, self).__init__() | |
| self.config = config | |
| self.c_code_encoder = AutoModel.from_pretrained("microsoft/codebert-base") | |
| self.pseudocode_encoder = AutoModel.from_pretrained("microsoft/codebert-base") | |
| #Projection network with 1 hidden layer | |
| self.projection = nn.Sequential( | |
| nn.Linear(config.embedding_dim, config.hidden_dim), | |
| nn.ReLU(), # First activation function | |
| nn.Linear(config.hidden_dim, config.hidden_dim), #Hidden layer | |
| nn.ReLU(), # Second activation function | |
| nn.Linear(config.hidden_dim, config.embedding_dim) #Output layer projecting back to the original embedding space | |
| ) | |
| def forward(self, c_code_inputs, pseudocode_inputs): | |
| #Encode C code and pseudocode | |
| c_code_embedding = self.c_code_encoder(**c_code_inputs).last_hidden_state.mean(dim=1) | |
| pseudocode_embedding = self.pseudocode_encoder(**pseudocode_inputs).last_hidden_state.mean(dim=1) | |
| #Apply the projection network to the pseudocode embeddings | |
| projected_pseudocode_embedding = self.projection(pseudocode_embedding) | |
| return c_code_embedding, projected_pseudocode_embedding | |
| model_name = "aircrypto/code-llama-7b-projection-largev2.11" | |
| config_file = hf_hub_download(repo_id=model_name, filename="config.json") | |
| with open(config_file, 'r') as f: | |
| config_dict = json.load(f) | |
| config = SimpleNamespace(**config_dict) | |
| model = ProjectionModel(config) | |
| model_path = hf_hub_download(repo_id=model_name, filename="pytorch_model.bin") | |
| state_dict = torch.load(model_path, map_location="cpu") | |
| model.load_state_dict(state_dict) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = model.to(device) | |
| print("Model loaded successfully!") | |
| tokenizer = AutoTokenizer.from_pretrained("aircrypto/code-llama-7b-projection-largev2.11") | |
| print("Tokenizer loaded successfully!") | |
| ``` |