SENODROOM
fixed
beef333
Raw
History Blame Contribute Delete
932 Bytes
import torch
from transformers import AutoTokenizer, AutoModel
print("Loading CodeBERT model... Please wait.")
model_name = "microsoft/codebert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
print("Model loaded successfully!")
sample_code = """
def calculate_experience(repo_data):
stars = repo_data.get('stars', 0)
commits = repo_data.get('commits', 0)
return (stars * 10) + commits
"""
print(f"\nAnalyzing Sample Code Snippet:\n{sample_code}")
inputs = tokenizer(sample_code, return_tensors="pt", padding=True, truncation=True)
# 4. Generating Embeddings
with torch.no_grad():
outputs = model(**inputs)
# 5. Extracting Vector (Last hidden state)
embeddings = outputs.last_hidden_state
print("\nAI Pipeline Success!")
print(f"Vector Shape: {embeddings.shape}")
print("Meaning: CodeBERT successfully converted your code into heavy AI numbers!")