Spaces:
Sleeping
Sleeping
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| import faiss | |
| import json | |
| global dua_data | |
| with open("json/dua_keys.json",'rb') as fl: | |
| dua_data = json.load(fl) | |
| keys2 = [i for i in dua_data.keys()] | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",cache_folder="./all-MiniLM-L6-v2") | |
| embeddings = model.encode(keys2) | |
| dimension = embeddings.shape[1] | |
| embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True) | |
| Index = faiss.IndexFlatIP(dimension) | |
| Index.add(embeddings) # type: ignore | |
| faiss.write_index(Index,"./model/dua_vector.faiss") | |
| """ | |
| USAGE | |
| def search(question,top_k=1): | |
| question_embeddings = model.encode([question]) | |
| question_embeddings = question_embeddings / np.linalg.norm(question_embeddings,axis=1,keepdims=True) | |
| distances,indices = Index.search(np.array(question_embeddings),top_k) | |
| results = [(keys2[i], distances[0][pos]) for pos,i in enumerate(indices[0])] | |
| return results | |
| question = input("Question: ") | |
| print(search(question)) | |
| """ | |
| """ | |
| # I used this to generate the new keys | |
| import json | |
| with open("json/dua_dataset.json",'rb') as fl: | |
| dua_data = json.load(fl) | |
| new_dataset = {} | |
| for data in dua_data: | |
| id = data["id"] | |
| text = data["full_text"] | |
| data = {text : id} | |
| new_dataset.update(data) | |
| with open("json/dua_keys.json",'w') as fl: | |
| json.dump(new_dataset,fl) | |
| """ | |