Spaces:
Runtime error
Runtime error
File size: 907 Bytes
b144cb7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import torch
from transformers import AutoTokenizer, AutoModel
MODEL_NAME = "microsoft/unixcoder-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
unix_model = AutoModel.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
unix_model.to(device)
unix_model.eval()
def get_unixcoder_embedding(code, max_length=512):
inputs = tokenizer(
code,
padding=True,
truncation=True,
max_length=max_length,
return_tensors="pt"
)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = unix_model(**inputs)
last_hidden = outputs.last_hidden_state
cls_embedding = last_hidden[:, 0, :]
mean_embedding = last_hidden.mean(dim=1)
combined = torch.cat((cls_embedding, mean_embedding), dim=1)
return combined.cpu().numpy().flatten() |