"""Embedding and vector search utilities.""" from openai import OpenAI import numpy as np from sklearn.metrics.pairwise import cosine_similarity from dotenv import load_dotenv import os # Load environment variables load_dotenv() # Initialize OpenAI client with API key from environment client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def get_embeddings(texts, model="text-embedding-3-small"): """Convert text to embedding vectors.""" if isinstance(texts, str): texts = [texts] response = client.embeddings.create(input=texts, model=model) return np.array([item.embedding for item in response.data]) def vector_search(query, chunks, chunk_embeddings, top_k=3): """Find the most similar chunks to the query.""" query_embedding = get_embeddings(query) similarities = cosine_similarity(query_embedding, chunk_embeddings)[0] top_indices = similarities.argsort()[::-1][:top_k] results = [] for idx in top_indices: results.append({ 'chunk': chunks[idx], 'similarity': similarities[idx] }) return results