duabymoon / model /model.py
JibexBanks's picture
created a new working endpoint
32a7aab
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import json
global dua_data
with open("json/dua_keys.json",'rb') as fl:
dua_data = json.load(fl)
keys2 = [i for i in dua_data.keys()]
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder="./all-MiniLM-L6-v2")
embeddings = model.encode(keys2)
dimension = embeddings.shape[1]
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
Index = faiss.IndexFlatIP(dimension)
Index.add(embeddings) # type: ignore
faiss.write_index(Index,"./model/dua_vector.faiss")
"""
USAGE
def search(question,top_k=1):
question_embeddings = model.encode([question])
question_embeddings = question_embeddings / np.linalg.norm(question_embeddings,axis=1,keepdims=True)
distances,indices = Index.search(np.array(question_embeddings),top_k)
results = [(keys2[i], distances[0][pos]) for pos,i in enumerate(indices[0])]
return results
question = input("Question: ")
print(search(question))
"""
"""
# I used this to generate the new keys
import json
with open("json/dua_dataset.json",'rb') as fl:
dua_data = json.load(fl)
new_dataset = {}
for data in dua_data:
id = data["id"]
text = data["full_text"]
data = {text : id}
new_dataset.update(data)
with open("json/dua_keys.json",'w') as fl:
json.dump(new_dataset,fl)
"""