File size: 1,206 Bytes
4e3cee0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import os
import hashlib
import json
import shutil
CACHE_DIR = os.path.join(os.path.dirname(__file__), "cache")
os.makedirs(CACHE_DIR, exist_ok=True)
def _hash_file(path: str) -> str:
"""Compute SHA256 fingerprint for a file."""
h = hashlib.sha256()
with open(path, "rb") as f:
while True:
chunk = f.read(8192)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
def get_cache_path(pdf_path: str) -> str:
fid = _hash_file(pdf_path)
return os.path.join(CACHE_DIR, f"{fid}.json")
def save_chunks_to_cache(pdf_path: str, chunks) -> str:
path = get_cache_path(pdf_path)
with open(path, "w", encoding="utf-8") as f:
json.dump(chunks, f, indent=2, ensure_ascii=False)
return path
def load_chunks_from_cache(pdf_path: str):
path = get_cache_path(pdf_path)
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
return None
def clear_cache() -> bool:
"""Delete all cached JSON files and recreate cache directory."""
shutil.rmtree(CACHE_DIR, ignore_errors=True)
os.makedirs(CACHE_DIR, exist_ok=True)
return True
|