Spaces:
Paused
Paused
| import pandas as pd | |
| from datasets import load_dataset | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| import pickle | |
| # Load the LeetCode dataset | |
| print("Loading dataset...") | |
| dataset = load_dataset("RayBernard/leetcode", split="train") | |
| df = pd.DataFrame(dataset) | |
| # Initialize sentence transformer model | |
| print("Initializing model...") | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Prepare embeddings for the dataset | |
| print("Computing embeddings...") | |
| df['embeddings'] = df['content'].apply(lambda x: model.encode(x)) | |
| # Save the dataframe with embeddings | |
| print("Saving dataframe with embeddings...") | |
| with open('leetcode_embeddings.pkl', 'wb') as f: | |
| pickle.dump(df, f) | |
| print("Done! Embeddings saved to leetcode_embeddings.pkl") |