File size: 757 Bytes
5c33ee6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pandas as pd
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import numpy as np
import pickle

# Load the LeetCode dataset
print("Loading dataset...")
dataset = load_dataset("RayBernard/leetcode", split="train")
df = pd.DataFrame(dataset)

# Initialize sentence transformer model
print("Initializing model...")
model = SentenceTransformer('all-MiniLM-L6-v2')

# Prepare embeddings for the dataset
print("Computing embeddings...")
df['embeddings'] = df['content'].apply(lambda x: model.encode(x))

# Save the dataframe with embeddings
print("Saving dataframe with embeddings...")
with open('leetcode_embeddings.pkl', 'wb') as f:
    pickle.dump(df, f)

print("Done! Embeddings saved to leetcode_embeddings.pkl")