vdoc_rag / app /cache_manager.py
aditya9128
Initial commit: VDoc-RAG - Intelligent Document Q&A with RAG
4e3cee0
import os
import hashlib
import json
import shutil
CACHE_DIR = os.path.join(os.path.dirname(__file__), "cache")
os.makedirs(CACHE_DIR, exist_ok=True)
def _hash_file(path: str) -> str:
"""Compute SHA256 fingerprint for a file."""
h = hashlib.sha256()
with open(path, "rb") as f:
while True:
chunk = f.read(8192)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
def get_cache_path(pdf_path: str) -> str:
fid = _hash_file(pdf_path)
return os.path.join(CACHE_DIR, f"{fid}.json")
def save_chunks_to_cache(pdf_path: str, chunks) -> str:
path = get_cache_path(pdf_path)
with open(path, "w", encoding="utf-8") as f:
json.dump(chunks, f, indent=2, ensure_ascii=False)
return path
def load_chunks_from_cache(pdf_path: str):
path = get_cache_path(pdf_path)
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
return None
def clear_cache() -> bool:
"""Delete all cached JSON files and recreate cache directory."""
shutil.rmtree(CACHE_DIR, ignore_errors=True)
os.makedirs(CACHE_DIR, exist_ok=True)
return True