File size: 700 Bytes
b4fb6ac
 
 
 
6c655a3
b4fb6ac
 
 
6c655a3
 
 
b4fb6ac
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import numpy as np
import ast
import faiss
import pandas as pd
from datasets import load_dataset


def load_index_once():
    dataset = load_dataset("DvorakInnovationAI/rt-genai-dataset-v1", revision="openai-embeddings")
    df = dataset["train"]
    df= df.to_pandas()
    df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    embeddings = np.vstack(df['embeddings'].values).astype('float32')
    faiss.normalize_L2(embeddings)
    index = faiss.IndexFlatIP(embeddings.shape[1])
    index.add(embeddings)
    return df, embeddings, index

print('Loading Embeddings...........')
# Load once on script start
df, embeddings, index = load_index_once()