| import torch
|
| from transformers import AutoTokenizer, AutoModel
|
|
|
|
|
| TOKENIZER = None
|
| MODEL = None
|
|
|
| def get_codebert_model():
|
| global TOKENIZER, MODEL
|
| if TOKENIZER is None or MODEL is None:
|
| print(" Loading CodeBERT pipeline into runtime architecture...")
|
| TOKENIZER = AutoTokenizer.from_pretrained("microsoft/codebert-base")
|
| MODEL = AutoModel.from_pretrained("microsoft/codebert-base")
|
| return TOKENIZER, MODEL
|
|
|
| def embedding_node(state: dict) -> dict:
|
| """
|
| LangGraph Node to process code text inputs into heavy mathematical vector embeddings.
|
| Accepts state and appends raw tensor shape information for downstream routing evaluation.
|
| """
|
| print(" [Embedding Node] Initializing CodeBERT processor vector generation...")
|
|
|
|
|
| sample_code = state.get("code_snippet", """
|
| def calculate_experience(repo_data):
|
| stars = repo_data.get('stars', 0)
|
| commits = repo_data.get('commits', 0)
|
| return (stars * 10) + commits
|
| """)
|
|
|
| try:
|
|
|
| tokenizer, model = get_codebert_model()
|
|
|
|
|
| code_tokens = tokenizer.tokenize(sample_code)
|
| tokens_ids = tokenizer.convert_tokens_to_ids(code_tokens)
|
| context_embeddings = model(torch.tensor([tokens_ids]))[0]
|
|
|
|
|
| vector_shape = list(context_embeddings.shape)
|
|
|
| state["embedding_vector_shape"] = vector_shape
|
| state["embedding_status"] = "SUCCESS"
|
| print(f" [Embedding Node] Successfully created code dimensions vector: {vector_shape}")
|
|
|
| except Exception as e:
|
| state["embedding_status"] = f"FAILED: {str(e)}"
|
| state["embedding_vector_shape"] = []
|
| print(f" [Embedding Node] Core evaluation error: {str(e)}")
|
|
|
|
|
| return state
|
|
|
|
|
|
|
| if __name__ == "__main__":
|
| initial_state = {"username": "test_user", "code_snippet": "print('Hello LangGraph World')"}
|
| print(" Testing Embedding Node locally with dummy state input...")
|
| final_state = embedding_node(initial_state)
|
| print(f"Final State Output Keys: {list(final_state.keys())}\n") |