Spaces:
Sleeping
Sleeping
| import google.generativeai as genai | |
| import numpy as np | |
| import os | |
| class InMemoryVectorStore: | |
| def __init__(self): | |
| self.store = {} | |
| self.model_name = "models/text-embedding-004" | |
| def get_embedding(self, text): | |
| try: | |
| result = genai.embed_content( | |
| model=self.model_name, | |
| content=text, | |
| task_type="retrieval_document" | |
| ) | |
| return result['embedding'] | |
| except Exception as e: | |
| print(f"Error getting embedding: {e}") | |
| return [] | |
| def add_contract(self, filename: str, chunks: list): | |
| print(f"Indexing {filename} using Google Embeddings...") | |
| self.store[filename] = [] | |
| for chunk in chunks: | |
| text = chunk["text"] | |
| vector = self.get_embedding(text) | |
| if vector: | |
| self.store[filename].append({ | |
| "text": text, | |
| "vector": np.array(vector), | |
| "metadata": {"page": chunk["page"]} | |
| }) | |
| print(f"Indexed {len(self.store[filename])} chunks for {filename}") | |
| def search_similar(self, query: str, filename: str, n_results: int = 3): | |
| if filename not in self.store: | |
| return {"documents": [[]], "metadatas": [[]], "distances": [[]]} | |
| try: | |
| query_emb = genai.embed_content( | |
| model=self.model_name, | |
| content=query, | |
| task_type="retrieval_query" | |
| )['embedding'] | |
| query_vec = np.array(query_emb) | |
| except: | |
| return {"documents": [[]], "metadatas": [[]], "distances": [[]]} | |
| scores = [] | |
| for item in self.store[filename]: | |
| doc_vec = item["vector"] | |
| score = np.dot(query_vec, doc_vec) / (np.linalg.norm(query_vec) * np.linalg.norm(doc_vec)) | |
| scores.append((score, item)) | |
| scores.sort(key=lambda x: x[0], reverse=True) | |
| top_results = scores[:n_results] | |
| return { | |
| "documents": [[res[1]["text"] for res in top_results]], | |
| "metadatas": [[res[1]["metadata"] for res in top_results]], | |
| "distances": [[1 - res[0] for res in top_results]] | |
| } | |
| # Instancia global (Singularidad) | |
| vector_db = InMemoryVectorStore() |