query / compute.py
poemsforaphrodite's picture
Upload folder using huggingface_hub
5c33ee6 verified
import pandas as pd
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import numpy as np
import pickle
# Load the LeetCode dataset
print("Loading dataset...")
dataset = load_dataset("RayBernard/leetcode", split="train")
df = pd.DataFrame(dataset)
# Initialize sentence transformer model
print("Initializing model...")
model = SentenceTransformer('all-MiniLM-L6-v2')
# Prepare embeddings for the dataset
print("Computing embeddings...")
df['embeddings'] = df['content'].apply(lambda x: model.encode(x))
# Save the dataframe with embeddings
print("Saving dataframe with embeddings...")
with open('leetcode_embeddings.pkl', 'wb') as f:
pickle.dump(df, f)
print("Done! Embeddings saved to leetcode_embeddings.pkl")