Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import ast | |
| import faiss | |
| import pandas as pd | |
| from datasets import load_dataset | |
| def load_index_once(): | |
| dataset = load_dataset("DvorakInnovationAI/rt-genai-dataset-v1", revision="openai-embeddings") | |
| df = dataset["train"] | |
| df= df.to_pandas() | |
| df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) | |
| embeddings = np.vstack(df['embeddings'].values).astype('float32') | |
| faiss.normalize_L2(embeddings) | |
| index = faiss.IndexFlatIP(embeddings.shape[1]) | |
| index.add(embeddings) | |
| return df, embeddings, index | |
| print('Loading Embeddings...........') | |
| # Load once on script start | |
| df, embeddings, index = load_index_once() |