Spaces:
Sleeping
Sleeping
| # embedding.py | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| # --- Load data --- | |
| def load_data(): | |
| data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv') | |
| df = pd.read_csv(data_path) | |
| return df['question'].tolist(), df['answer'].tolist() | |
| # --- Embedding model and FAISS index --- | |
| def setup_embeddings(answers): | |
| embedder = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2') | |
| answer_embeddings = embedder.encode(answers, show_progress_bar=True) | |
| index = faiss.IndexFlatL2(answer_embeddings.shape[1]) | |
| index.add(np.array(answer_embeddings)) | |
| return embedder, index |