Spaces:
Sleeping
Sleeping
File size: 1,101 Bytes
4eaaf4a e33886d 4eaaf4a e33886d 4eaaf4a e33886d 4eaaf4a e33886d 4eaaf4a e33886d 4eaaf4a e33886d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | """Embedding and vector search utilities."""
from openai import OpenAI
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
# Initialize OpenAI client with API key from environment
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def get_embeddings(texts, model="text-embedding-3-small"):
"""Convert text to embedding vectors."""
if isinstance(texts, str):
texts = [texts]
response = client.embeddings.create(input=texts, model=model)
return np.array([item.embedding for item in response.data])
def vector_search(query, chunks, chunk_embeddings, top_k=3):
"""Find the most similar chunks to the query."""
query_embedding = get_embeddings(query)
similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
top_indices = similarities.argsort()[::-1][:top_k]
results = []
for idx in top_indices:
results.append({
'chunk': chunks[idx],
'similarity': similarities[idx]
})
return results
|