aircrypto
/

code-llama-7b-projection-largev2.11

ProjectionModel

Model card Files Files and versions

code-llama-7b-projection-largev2.11 / README.md

aircrypto's picture

Update README.md

d753b69 verified over 1 year ago

|

history blame contribute delete

2.41 kB

	Very well working 2 layer NN for projection.
	Trained with custom N Pairs loss + hard negative mining.
	margin = 1.0

	Load in using following code:

	```python
	import torch
	import torch.nn as nn
	from transformers import AutoConfig, AutoTokenizer, AutoModel
	from huggingface_hub import hf_hub_download
	import json
	from types import SimpleNamespace

	#model architecture - needed since this is a custom model
	class ProjectionModel(nn.Module):
	def __init__(self, config):
	super(ProjectionModel, self).__init__()
	self.config = config
	self.c_code_encoder = AutoModel.from_pretrained("microsoft/codebert-base")
	self.pseudocode_encoder = AutoModel.from_pretrained("microsoft/codebert-base")

	#Projection network with 1 hidden layer
	self.projection = nn.Sequential(
	nn.Linear(config.embedding_dim, config.hidden_dim),
	nn.ReLU(), # First activation function
	nn.Linear(config.hidden_dim, config.hidden_dim), #Hidden layer
	nn.ReLU(), # Second activation function
	nn.Linear(config.hidden_dim, config.embedding_dim) #Output layer projecting back to the original embedding space
	)

	def forward(self, c_code_inputs, pseudocode_inputs):
	#Encode C code and pseudocode
	c_code_embedding = self.c_code_encoder(**c_code_inputs).last_hidden_state.mean(dim=1)
	pseudocode_embedding = self.pseudocode_encoder(**pseudocode_inputs).last_hidden_state.mean(dim=1)

	#Apply the projection network to the pseudocode embeddings
	projected_pseudocode_embedding = self.projection(pseudocode_embedding)

	return c_code_embedding, projected_pseudocode_embedding

	model_name = "aircrypto/code-llama-7b-projection-largev2.11"
	config_file = hf_hub_download(repo_id=model_name, filename="config.json")

	with open(config_file, 'r') as f:
	config_dict = json.load(f)
	config = SimpleNamespace(**config_dict)

	model = ProjectionModel(config)
	model_path = hf_hub_download(repo_id=model_name, filename="pytorch_model.bin")
	state_dict = torch.load(model_path, map_location="cpu")
	model.load_state_dict(state_dict)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)

	print("Model loaded successfully!")

	tokenizer = AutoTokenizer.from_pretrained("aircrypto/code-llama-7b-projection-largev2.11")

	print("Tokenizer loaded successfully!")
	```