Spaces:
Sleeping
Sleeping
File size: 1,665 Bytes
d0abef8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# src/embed_service/cache_manager.py
import os
import json
import numpy as np
CACHE_DIR = "cache"
META_PATH = f"{CACHE_DIR}/embed_meta.json"
EMB_PATH = f"{CACHE_DIR}/embeddings.npy"
class CacheManager:
def __init__(self):
os.makedirs(CACHE_DIR, exist_ok=True)
if os.path.exists(META_PATH):
with open(META_PATH, "r") as f:
self.meta = json.load(f)
else:
self.meta = {} # filename -> {"hash":..., "index": int}
if os.path.exists(EMB_PATH):
self.embeddings = np.load(EMB_PATH)
else:
# empty array shaped (0, dim) — we'll resize when first embedding arrives
self.embeddings = np.zeros((0, 384), dtype="float32")
def save(self):
with open(META_PATH, "w") as f:
json.dump(self.meta, f, indent=2)
np.save(EMB_PATH, self.embeddings)
def exists(self, filename: str, file_hash: str) -> bool:
return filename in self.meta and self.meta[filename]["hash"] == file_hash
def get_embedding(self, filename: str):
idx = int(self.meta[filename]["index"])
return self.embeddings[idx]
def add_embedding(self, filename: str, file_hash: str, embedding):
embedding = embedding.astype("float32")
idx = len(self.embeddings)
self.meta[filename] = {"hash": file_hash, "index": idx}
if self.embeddings.shape[0] == 0:
self.embeddings = embedding.reshape(1, -1)
else:
self.embeddings = np.vstack([self.embeddings, embedding.reshape(1, -1)])
self.save()
def all_embeddings(self):
return self.meta, self.embeddings
|