| import torch |
| from transformers import AutoTokenizer, AutoModel |
|
|
| print("Loading CodeBERT model... Please wait.") |
|
|
| model_name = "microsoft/codebert-base" |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModel.from_pretrained(model_name) |
|
|
| print("Model loaded successfully!") |
|
|
|
|
| sample_code = """ |
| def calculate_experience(repo_data): |
| stars = repo_data.get('stars', 0) |
| commits = repo_data.get('commits', 0) |
| return (stars * 10) + commits |
| """ |
|
|
| print(f"\nAnalyzing Sample Code Snippet:\n{sample_code}") |
|
|
|
|
| inputs = tokenizer(sample_code, return_tensors="pt", padding=True, truncation=True) |
|
|
| |
| with torch.no_grad(): |
| outputs = model(**inputs) |
|
|
| |
| embeddings = outputs.last_hidden_state |
|
|
| print("\nAI Pipeline Success!") |
| print(f"Vector Shape: {embeddings.shape}") |
| print("Meaning: CodeBERT successfully converted your code into heavy AI numbers!") |
|
|
|
|