Spaces:
Sleeping
Sleeping
| from typing import List, Tuple | |
| import numpy as np | |
| import cohere | |
| from settings import COHERE_API_KEY, COHERE_EMBED_MODEL | |
| class RAGIndex: | |
| def __init__(self): | |
| self.client = cohere.Client(api_key=COHERE_API_KEY) if COHERE_API_KEY else None | |
| self.texts: List[str] = [] | |
| self.vecs: np.ndarray | None = None | |
| def _embed(self, texts: List[str]) -> np.ndarray: | |
| if not texts: return np.zeros((0, 384), dtype="float32") | |
| if not self.client: | |
| # Fallback: random embeddings (avoid crash; not ideal) | |
| return np.random.normal(size=(len(texts), 384)).astype("float32") | |
| resp = self.client.embed(texts=texts, model=COHERE_EMBED_MODEL) | |
| vecs = np.array(getattr(resp, "embeddings", []) or getattr(resp, "data", []), dtype="float32") | |
| return vecs | |
| def add(self, chunks: List[str]): | |
| if not chunks: return | |
| new_vecs = self._embed(chunks) | |
| if self.vecs is None: | |
| self.vecs = new_vecs | |
| self.texts = list(chunks) | |
| else: | |
| self.vecs = np.vstack([self.vecs, new_vecs]) | |
| self.texts.extend(chunks) | |
| def retrieve(self, query: str, k: int = 6) -> List[Tuple[str, float]]: | |
| if not self.texts: return [] | |
| qv = self._embed([query])[0] | |
| sims = (self.vecs @ qv) / (np.linalg.norm(self.vecs, axis=1) * (np.linalg.norm(qv) + 1e-9)) | |
| idx = np.argsort(-sims)[:k] | |
| return [(self.texts[i], float(sims[i])) for i in idx] | |