import os import hashlib import json import shutil CACHE_DIR = os.path.join(os.path.dirname(__file__), "cache") os.makedirs(CACHE_DIR, exist_ok=True) def _hash_file(path: str) -> str: """Compute SHA256 fingerprint for a file.""" h = hashlib.sha256() with open(path, "rb") as f: while True: chunk = f.read(8192) if not chunk: break h.update(chunk) return h.hexdigest() def get_cache_path(pdf_path: str) -> str: fid = _hash_file(pdf_path) return os.path.join(CACHE_DIR, f"{fid}.json") def save_chunks_to_cache(pdf_path: str, chunks) -> str: path = get_cache_path(pdf_path) with open(path, "w", encoding="utf-8") as f: json.dump(chunks, f, indent=2, ensure_ascii=False) return path def load_chunks_from_cache(pdf_path: str): path = get_cache_path(pdf_path) if os.path.exists(path): with open(path, "r", encoding="utf-8") as f: return json.load(f) return None def clear_cache() -> bool: """Delete all cached JSON files and recreate cache directory.""" shutil.rmtree(CACHE_DIR, ignore_errors=True) os.makedirs(CACHE_DIR, exist_ok=True) return True