import torch
from transformers import AutoTokenizer, AutoModel

print("Loading CodeBERT model... Please wait.")

model_name = "microsoft/codebert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

print("Model loaded successfully!")


sample_code = """
def calculate_experience(repo_data):
    stars = repo_data.get('stars', 0)
    commits = repo_data.get('commits', 0)
    return (stars * 10) + commits
"""

print(f"\nAnalyzing Sample Code Snippet:\n{sample_code}")


inputs = tokenizer(sample_code, return_tensors="pt", padding=True, truncation=True)

# 4. Generating Embeddings
with torch.no_grad():
    outputs = model(**inputs)

# 5. Extracting Vector (Last hidden state)
embeddings = outputs.last_hidden_state

print("\nAI Pipeline Success!")
print(f"Vector Shape: {embeddings.shape}")
print("Meaning: CodeBERT successfully converted your code into heavy AI numbers!")