# embedding.py import os import numpy as np import pandas as pd import faiss from sentence_transformers import SentenceTransformer # --- Load data --- def load_data(): data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv') df = pd.read_csv(data_path) return df['question'].tolist(), df['answer'].tolist() # --- Embedding model and FAISS index --- def setup_embeddings(answers): embedder = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2') answer_embeddings = embedder.encode(answers, show_progress_bar=True) index = faiss.IndexFlatL2(answer_embeddings.shape[1]) index.add(np.array(answer_embeddings)) return embedder, index