bioethics-rag / src /metadata_patcher.py
ciorant's picture
Patching metadata
66b97d2
raw
history blame contribute delete
640 Bytes
from src.vector_store import FAISSVectorStore
from typing import Dict
def patch_metadata_for_store(store: FAISSVectorStore, metadata_fixes: Dict[str, Dict]):
"""
Patch metadata in an in-memory FAISSVectorStore and save it.
store: existing FAISSVectorStore instance (will be modified in place).
metadata_fixes: mapping filename -> metadata dict (e.g. {"a.pdf": {"authors":"X","year":"2021"}})
"""
for doc in store.documents:
fname = doc.get("metadata", {}).get("filename")
if fname in metadata_fixes:
doc.setdefault("metadata", {}).update(metadata_fixes[fname])
store.save_index()