import pandas as pd from datasets import load_dataset from sentence_transformers import SentenceTransformer import numpy as np import pickle # Load the LeetCode dataset print("Loading dataset...") dataset = load_dataset("RayBernard/leetcode", split="train") df = pd.DataFrame(dataset) # Initialize sentence transformer model print("Initializing model...") model = SentenceTransformer('all-MiniLM-L6-v2') # Prepare embeddings for the dataset print("Computing embeddings...") df['embeddings'] = df['content'].apply(lambda x: model.encode(x)) # Save the dataframe with embeddings print("Saving dataframe with embeddings...") with open('leetcode_embeddings.pkl', 'wb') as f: pickle.dump(df, f) print("Done! Embeddings saved to leetcode_embeddings.pkl")