Spaces:
Sleeping
Sleeping
| # src/embed_service/cache_manager.py | |
| import os | |
| import json | |
| import numpy as np | |
| CACHE_DIR = "cache" | |
| META_PATH = f"{CACHE_DIR}/embed_meta.json" | |
| EMB_PATH = f"{CACHE_DIR}/embeddings.npy" | |
| class CacheManager: | |
| def __init__(self): | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| if os.path.exists(META_PATH): | |
| with open(META_PATH, "r") as f: | |
| self.meta = json.load(f) | |
| else: | |
| self.meta = {} # filename -> {"hash":..., "index": int} | |
| if os.path.exists(EMB_PATH): | |
| self.embeddings = np.load(EMB_PATH) | |
| else: | |
| # empty array shaped (0, dim) — we'll resize when first embedding arrives | |
| self.embeddings = np.zeros((0, 384), dtype="float32") | |
| def save(self): | |
| with open(META_PATH, "w") as f: | |
| json.dump(self.meta, f, indent=2) | |
| np.save(EMB_PATH, self.embeddings) | |
| def exists(self, filename: str, file_hash: str) -> bool: | |
| return filename in self.meta and self.meta[filename]["hash"] == file_hash | |
| def get_embedding(self, filename: str): | |
| idx = int(self.meta[filename]["index"]) | |
| return self.embeddings[idx] | |
| def add_embedding(self, filename: str, file_hash: str, embedding): | |
| embedding = embedding.astype("float32") | |
| idx = len(self.embeddings) | |
| self.meta[filename] = {"hash": file_hash, "index": idx} | |
| if self.embeddings.shape[0] == 0: | |
| self.embeddings = embedding.reshape(1, -1) | |
| else: | |
| self.embeddings = np.vstack([self.embeddings, embedding.reshape(1, -1)]) | |
| self.save() | |
| def all_embeddings(self): | |
| return self.meta, self.embeddings | |