Spaces:
Build error
Build error
| from sentence_transformers import SentenceTransformer, util | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import numpy as np | |
| import faiss | |
| class Robot(): | |
| def __init__(self, index_name): | |
| #variables globales | |
| self.modelo = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") | |
| dataset = load_dataset('Waflon/FAQ', split="train") # Loading the SQuAD dataset from huggingface. | |
| self.df_FAQ = pd.DataFrame(dataset) | |
| self.index = index_name | |
| try: | |
| self.index = faiss.read_index(index_name) #si existe un caché, saltarse todo | |
| except: | |
| text = self.df_FAQ['respuestas'] | |
| vectors = self.modelo.encode(text) | |
| vector_dimension = vectors.shape[1] #agregar dimensionalidad del modelo al vector | |
| self.index = faiss.IndexFlatL2(vector_dimension) | |
| faiss.normalize_L2(vectors) | |
| self.index.add(vectors) | |
| faiss.write_index(self.index , index_name) | |
| def preguntar(self, pregunta:str,k_resultados=2) -> pd.DataFrame: | |
| vector_busqueda = self.modelo.encode(pregunta) | |
| _vector = np.array([vector_busqueda]) | |
| faiss.normalize_L2(_vector) | |
| k = self.index.ntotal | |
| distances, ann = self.index.search(_vector, k=k) #respuestas y los indices | |
| results = pd.DataFrame({'distances': distances[0], 'ann': ann[0]}) | |
| df_respuesta = pd.merge(results, self.df_FAQ, left_on='ann', right_index=True) #DataFrame | |
| indices_respuesta = df_respuesta['ann'].head(k_resultados).to_list() | |
| algo = [df_respuesta['respuestas'][i] for i in indices_respuesta] | |
| return algo | |