Spaces:
Sleeping
Sleeping
File size: 700 Bytes
b4fb6ac 6c655a3 b4fb6ac 6c655a3 b4fb6ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
import numpy as np
import ast
import faiss
import pandas as pd
from datasets import load_dataset
def load_index_once():
dataset = load_dataset("DvorakInnovationAI/rt-genai-dataset-v1", revision="openai-embeddings")
df = dataset["train"]
df= df.to_pandas()
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
embeddings = np.vstack(df['embeddings'].values).astype('float32')
faiss.normalize_L2(embeddings)
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)
return df, embeddings, index
print('Loading Embeddings...........')
# Load once on script start
df, embeddings, index = load_index_once() |