Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from typing import List, Tuple | |
| from .similarity import cosine_similarity | |
| from .vectorizer import Vectorizer | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class PromptSearchEngine: | |
| def __init__(self): | |
| self.vectorizer = Vectorizer(init_pinecone=False) | |
| self.vectorizer._data_loaded = True | |
| self.prompts = self.vectorizer.prompts | |
| self.corpus_vectors = self.vectorizer.transform(self.prompts) | |
| self.index_name = self.vectorizer.pinecone_index_name | |
| def most_similar(self, query: str, n: int = 5, use_pinecone=True) -> List[Tuple[float, str]]: | |
| logger.info(f"Encoding query: {query}") | |
| query_vector = self.vectorizer.transform([query])[0] | |
| logger.info(f"Encoded query vector: {query_vector}") | |
| if use_pinecone: | |
| logger.info(f"I'm doing pinecone vector search because the use_pinecone is: {use_pinecone}") | |
| try: | |
| # Convert numpy array to list of native Python floats | |
| query_vector_list = query_vector.tolist() | |
| search_result = self.vectorizer.index.query( | |
| vector=query_vector_list, | |
| top_k=n, | |
| include_metadata=True | |
| ) | |
| logger.info(f"Search result: {search_result}") | |
| # Retrieve and format the results | |
| results = [(match['score'], match['metadata']['text']) for match in search_result['matches'] if | |
| 'text' in match['metadata']] | |
| except Exception as e: | |
| logger.error(f"Pinecone query failed: {e}") | |
| logger.info("Falling back to cosine similarity search.") | |
| # Fallback to cosine similarity search | |
| similarities = cosine_similarity(query_vector, self.corpus_vectors) | |
| top_n_indices = np.argsort(similarities)[-n:][::-1] | |
| results = [(float(similarities[i]), self.prompts[i]) for i in top_n_indices] | |
| else: | |
| logger.info(f"I'm cosine similarity search because the use_pinecone is: {use_pinecone}") | |
| logger.info("Using cosine similarity for search") | |
| similarities = cosine_similarity(query_vector, self.corpus_vectors) | |
| top_n_indices = np.argsort(similarities)[-n:][::-1] | |
| results = [(float(similarities[i]), self.prompts[i]) for i in top_n_indices] | |
| return results | |