WillyCodesInit commited on
Commit
0581902
·
verified ·
1 Parent(s): 17f5c65

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +19 -0
utils.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import faiss
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ def load_dataset(path):
7
+ return pd.read_csv(path)
8
+
9
+ def embed_questions(df, model_name="all-MiniLM-L6-v2"):
10
+ model = SentenceTransformer(model_name)
11
+ embeddings = model.encode(df["question"].tolist(), show_progress_bar=True)
12
+ index = faiss.IndexFlatL2(embeddings.shape[1])
13
+ index.add(np.array(embeddings))
14
+ return model, index, embeddings
15
+
16
+ def retrieve_context(query, model, index, df, k=3):
17
+ query_vec = model.encode([query])
18
+ D, I = index.search(query_vec, k)
19
+ return "\n".join(df["answer"].iloc[i] for i in I[0])