from sentence_transformers import SentenceTransformer import numpy as np import faiss import json global dua_data with open("json/dua_keys.json",'rb') as fl: dua_data = json.load(fl) keys2 = [i for i in dua_data.keys()] model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder="./all-MiniLM-L6-v2") embeddings = model.encode(keys2) dimension = embeddings.shape[1] embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True) Index = faiss.IndexFlatIP(dimension) Index.add(embeddings) # type: ignore faiss.write_index(Index,"./model/dua_vector.faiss") """ USAGE def search(question,top_k=1): question_embeddings = model.encode([question]) question_embeddings = question_embeddings / np.linalg.norm(question_embeddings,axis=1,keepdims=True) distances,indices = Index.search(np.array(question_embeddings),top_k) results = [(keys2[i], distances[0][pos]) for pos,i in enumerate(indices[0])] return results question = input("Question: ") print(search(question)) """ """ # I used this to generate the new keys import json with open("json/dua_dataset.json",'rb') as fl: dua_data = json.load(fl) new_dataset = {} for data in dua_data: id = data["id"] text = data["full_text"] data = {text : id} new_dataset.update(data) with open("json/dua_keys.json",'w') as fl: json.dump(new_dataset,fl) """