finsmart_bot / src /embedding.py
WillyCodesInit's picture
Create embedding.py
7318709 verified
raw
history blame contribute delete
691 Bytes
# embedding.py
import os
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
# --- Load data ---
def load_data():
data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv')
df = pd.read_csv(data_path)
return df['question'].tolist(), df['answer'].tolist()
# --- Embedding model and FAISS index ---
def setup_embeddings(answers):
embedder = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
answer_embeddings = embedder.encode(answers, show_progress_bar=True)
index = faiss.IndexFlatL2(answer_embeddings.shape[1])
index.add(np.array(answer_embeddings))
return embedder, index