Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| import threading | |
| import time | |
| from pathlib import Path | |
| from typing import Any, Dict, List | |
| import faiss | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") | |
| WORK_DIR = Path(os.getenv("WORK_DIR", "/data/work")) | |
| ART_DIR = WORK_DIR / "out" / "artifacts" | |
| RELOAD_POLL_SECONDS = int(os.getenv("RELOAD_POLL_SECONDS", "30")) | |
| class LocalIndexRuntime: | |
| def __init__(self) -> None: | |
| self.model = SentenceTransformer(EMBED_MODEL) | |
| self.index: faiss.Index | None = None | |
| self.meta: List[Dict[str, Any]] | None = None | |
| self.last_mtime: float | None = None | |
| self.last_check = 0.0 | |
| self._lock = threading.RLock() | |
| def _paths(self) -> tuple[Path, Path]: | |
| return (ART_DIR / "faiss.index", ART_DIR / "meta.json") | |
| def exists(self) -> bool: | |
| idx, meta = self._paths() | |
| return idx.exists() and meta.exists() | |
| def load(self) -> None: | |
| with self._lock: | |
| idx, meta = self._paths() | |
| self.index = faiss.read_index(str(idx)) | |
| self.meta = json.loads(meta.read_text(encoding="utf-8")) | |
| self.last_mtime = idx.stat().st_mtime | |
| self.last_check = time.time() | |
| print(f"[INDEX] loaded local index from {idx}") | |
| def ensure_loaded(self) -> None: | |
| with self._lock: | |
| if not self.exists(): | |
| raise RuntimeError(f"Indice nao existe em {ART_DIR}. Rode reindex primeiro.") | |
| if self.index is None or self.meta is None: | |
| self.load() | |
| def maybe_reload(self) -> None: | |
| now = time.time() | |
| if now - self.last_check < RELOAD_POLL_SECONDS: | |
| return | |
| self.last_check = now | |
| idx, _ = self._paths() | |
| if not idx.exists(): | |
| return | |
| mtime = idx.stat().st_mtime | |
| if self.last_mtime is None or mtime > self.last_mtime: | |
| print("[INDEX] detected updated index; reloading...") | |
| self.load() | |
| def search(self, query: str, k: int = 4) -> List[Dict[str, Any]]: | |
| self.ensure_loaded() | |
| with self._lock: | |
| assert self.index is not None | |
| assert self.meta is not None | |
| qv = self.model.encode([query], normalize_embeddings=True) | |
| qv = np.asarray(qv, dtype="float32") | |
| scores, idxs = self.index.search(qv, k) | |
| out: List[Dict[str, Any]] = [] | |
| for score, i in zip(scores[0], idxs[0]): | |
| if i == -1: | |
| continue | |
| item = dict(self.meta[i]) | |
| item["score"] = float(score) | |
| out.append(item) | |
| return out | |