Spaces:
Sleeping
Sleeping
File size: 691 Bytes
7318709 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | # embedding.py
import os
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
# --- Load data ---
def load_data():
data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv')
df = pd.read_csv(data_path)
return df['question'].tolist(), df['answer'].tolist()
# --- Embedding model and FAISS index ---
def setup_embeddings(answers):
embedder = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
answer_embeddings = embedder.encode(answers, show_progress_bar=True)
index = faiss.IndexFlatL2(answer_embeddings.shape[1])
index.add(np.array(answer_embeddings))
return embedder, index |