Spaces:
Runtime error
Runtime error
| import numpy as np | |
| from typing import List, Tuple | |
| from aimakerspace.openai_utils.embedding import EmbeddingModel | |
| class VectorDatabase: | |
| def __init__(self): | |
| print("Initializing VectorDatabase...") # Debug log | |
| try: | |
| self.embedding_model = EmbeddingModel() | |
| print("EmbeddingModel initialized") # Debug log | |
| self.embeddings = [] | |
| self.texts = [] | |
| except Exception as e: | |
| print(f"Error initializing VectorDatabase: {str(e)}") # Debug log | |
| raise | |
| async def abuild_from_list(self, texts: List[str]): | |
| try: | |
| print(f"Building vector database from {len(texts)} texts") # Debug log | |
| self.texts = texts | |
| print("Generating embeddings...") # Debug log | |
| self.embeddings = await self.embedding_model.async_get_embeddings(texts) | |
| print(f"Generated {len(self.embeddings)} embeddings") # Debug log | |
| return self | |
| except Exception as e: | |
| print(f"Error building vector database: {str(e)}") # Debug log | |
| raise | |
| def _cosine_similarity(self, a: List[float], b: List[float]) -> float: | |
| a = np.array(a) | |
| b = np.array(b) | |
| return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) | |
| async def search_by_text(self, query: str, k: int = 4) -> List[Tuple[str, float]]: | |
| try: | |
| print(f"Searching for query: {query}") # Debug log | |
| query_embedding = await self.embedding_model.embed_query(query) | |
| print("Generated query embedding") # Debug log | |
| # Calculate similarities | |
| similarities = [] | |
| for i, embedding in enumerate(self.embeddings): | |
| similarity = self._cosine_similarity(query_embedding, embedding) | |
| similarities.append((self.texts[i], similarity)) | |
| # Sort by similarity and return top k | |
| similarities.sort(key=lambda x: x[1], reverse=True) | |
| print(f"Found {len(similarities)} matches") # Debug log | |
| return similarities[:k] | |
| except Exception as e: | |
| print(f"Error in search_by_text: {str(e)}") # Debug log | |
| raise |